#ifndef _FFT_IL_SOURCE_H_
#define _FFT_IL_SOURCE_H_

namespace amdspl
{
    namespace fft
    {

static const char* _fft8_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 1.121038771e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000008 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l29 = (1.261168618e-44f, 7.174648137e-43f, 0.0f, 0.0f, ) \n"
"dcl_literal l29, 0x00000009, 0x00000200, 0x00000000, 0x00000000 \n"
"ishl r70.x___, vThreadGrpIdFlat0.x, l29.x \n"
"iadd r70.x___, vTidInGrpFlat0.x, r70.x \n"
"call 5 \n"
"call 4 \n"
"call 6 \n"
"endmain \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8. \n"
"func 4 \n"
"mov r50, r400 \n"
"mov r51, r404 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r404, r51 \n"
"mov r50, r401 \n"
"mov r51, r405 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r405, r51 \n"
"mov r50, r402 \n"
"mov r51, r406 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r406, r51 \n"
"mov r50, r403 \n"
"mov r51, r407 \n"
"call 2 \n"
"mov r403, r50 \n"
"mov r407, r51 \n"
"mov r40, r405 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+64] \n"
"mov r402, g[r70.x+128] \n"
"mov r403, g[r70.x+192] \n"
"mov r404, g[r70.x+256] \n"
"mov r405, g[r70.x+320] \n"
"mov r406, g[r70.x+384] \n"
"mov r407, g[r70.x+448] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+64], r404 \n"
"mov g[r70.x+128], r402 \n"
"mov g[r70.x+192], r406 \n"
"mov g[r70.x+256], r401 \n"
"mov g[r70.x+320], r405 \n"
"mov g[r70.x+384], r403 \n"
"mov g[r70.x+448], r407 \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _fft16_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.242077543e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000010 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l29 = (1.261168618e-44f, 7.174648137e-43f, 0.0f, 0.0f, ) \n"
"dcl_literal l29, 0x00000009, 0x00000200, 0x00000000, 0x00000000 \n"
"; l30 = (1.401298464e-44f, 1.434929627e-42f, 0.0f, 0.0f, ) \n"
"dcl_literal l30, 0x0000000A, 0x00000400, 0x00000000, 0x00000000 \n"
"ishl r70.x___, vThreadGrpIdFlat0.x, l30.x \n"
"iadd r70.x___, vTidInGrpFlat0.x, r70.x \n"
"iadd r70._y__, r70.x, l29.y \n"
"call 5 \n"
"call 4 \n"
"call 6 \n"
"endmain \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT16 \n"
"func 4 \n"
"mov r60, r400 \n"
"mov r61, r404 \n"
"mov r62, r408 \n"
"mov r63, r412 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r404, r61 \n"
"mov r408, r62 \n"
"mov r412, r63 \n"
"mov r60, r401 \n"
"mov r61, r405 \n"
"mov r62, r409 \n"
"mov r63, r413 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r405, r61 \n"
"mov r409, r62 \n"
"mov r413, r63 \n"
"mov r60, r402 \n"
"mov r61, r406 \n"
"mov r62, r410 \n"
"mov r63, r414 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r406, r61 \n"
"mov r410, r62 \n"
"mov r414, r63 \n"
"mov r60, r403 \n"
"mov r61, r407 \n"
"mov r62, r411 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r407, r61 \n"
"mov r411, r62 \n"
"mov r415, r63 \n"
"mov r40, r405 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r40, r409 \n"
"mov r41, l2.yzyz \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r410, r40 \n"
"mul_ieee r410, r410, l1.w \n"
"mov r40, r411 \n"
"mov r41, l2.xwxw \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r413 \n"
"mov r41, l2.xwxw \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r414, r40 \n"
"mul_ieee r414, r414, l1.w \n"
"mov r40, r415 \n"
"mov r41, l2.wxwx \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"mov r60, r408 \n"
"mov r61, r409 \n"
"mov r62, r410 \n"
"mov r63, r411 \n"
"call 3 \n"
"mov r408, r60 \n"
"mov r409, r61 \n"
"mov r410, r62 \n"
"mov r411, r63 \n"
"mov r60, r412 \n"
"mov r61, r413 \n"
"mov r62, r414 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r412, r60 \n"
"mov r413, r61 \n"
"mov r414, r62 \n"
"mov r415, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+64] \n"
"mov r402, g[r70.x+128] \n"
"mov r403, g[r70.x+192] \n"
"mov r404, g[r70.x+256] \n"
"mov r405, g[r70.x+320] \n"
"mov r406, g[r70.x+384] \n"
"mov r407, g[r70.x+448] \n"
"mov r408, g[r70.y+0] \n"
"mov r409, g[r70.y+64] \n"
"mov r410, g[r70.y+128] \n"
"mov r411, g[r70.y+192] \n"
"mov r412, g[r70.y+256] \n"
"mov r413, g[r70.y+320] \n"
"mov r414, g[r70.y+384] \n"
"mov r415, g[r70.y+448] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+64], r408 \n"
"mov g[r70.x+128], r404 \n"
"mov g[r70.x+192], r412 \n"
"mov g[r70.x+256], r402 \n"
"mov g[r70.x+320], r410 \n"
"mov g[r70.x+384], r406 \n"
"mov g[r70.x+448], r414 \n"
"mov g[r70.y+0], r401 \n"
"mov g[r70.y+64], r409 \n"
"mov g[r70.y+128], r405 \n"
"mov g[r70.y+192], r413 \n"
"mov g[r70.y+256], r403 \n"
"mov g[r70.y+320], r411 \n"
"mov g[r70.y+384], r407 \n"
"mov g[r70.y+448], r415 \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _fft32_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.242077543e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000010 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l10 = (0.9238795042f, -0.3826834261f, 0.7071067691f, -0.7071067691f, ) \n"
"dcl_literal l10, 0x3F6C835E, 0xBEC3EF15, 0x3F3504F3, 0xBF3504F3 \n"
"; l11 = (0.3826834261f, -0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l11, 0x3EC3EF15, 0xBF6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l12 = (-0.7071067691f, -0.7071067691f, -0.9238795042f, -0.3826834261f, ) \n"
"dcl_literal l12, 0xBF3504F3, 0xBF3504F3, 0xBF6C835E, 0xBEC3EF15 \n"
"; l13 = (0.9807852507f, -0.1950903237f, 0.9238795042f, -0.3826834261f, ) \n"
"dcl_literal l13, 0x3F7B14BE, 0xBE47C5C2, 0x3F6C835E, 0xBEC3EF15 \n"
"; l14 = (0.8314695954f, -0.5555702448f, 0.7071067691f, -0.7071067691f, ) \n"
"dcl_literal l14, 0x3F54DB31, 0xBF0E39DA, 0x3F3504F3, 0xBF3504F3 \n"
"; l15 = (0.5555702448f, -0.8314695954f, 0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l15, 0x3F0E39DA, 0xBF54DB31, 0x3EC3EF15, 0xBF6C835E \n"
"; l16 = (0.1950903237f, -0.9807852507f, 0.8314695954f, -0.5555702448f, ) \n"
"dcl_literal l16, 0x3E47C5C2, 0xBF7B14BE, 0x3F54DB31, 0xBF0E39DA \n"
"; l17 = (0.3826834261f, -0.9238795042f, -0.1950903237f, -0.9807852507f, ) \n"
"dcl_literal l17, 0x3EC3EF15, 0xBF6C835E, 0xBE47C5C2, 0xBF7B14BE \n"
"; l18 = (-0.7071067691f, -0.7071067691f, -0.9807852507f, -0.1950903237f, ) \n"
"dcl_literal l18, 0xBF3504F3, 0xBF3504F3, 0xBF7B14BE, 0xBE47C5C2 \n"
"; l19 = (-0.9238795042f, 0.3826834261f, -0.5555702448f, 0.8314695954f, ) \n"
"dcl_literal l19, 0xBF6C835E, 0x3EC3EF15, 0xBF0E39DA, 0x3F54DB31 \n"
"; l30 = (1.541428311e-44f, 7.006492322e-45f, 7.174648137e-43f, 1.121038771e-44f, ) \n"
"dcl_literal l30, 0x0000000B, 0x00000005, 0x00000200, 0x00000008 \n"
"ishl r90.x___, vThreadGrpIdFlat0.x, l30.x \n"
"iadd r80.x___, vTidInGrpFlat0.x, r90.x \n"
"call 6 \n"
"call 5 \n"
"ishl r90._y__, vTidInGrpFlat0.x, l30.y \n"
"iadd r80.x___, r90.x, r90.y \n"
"call 7 \n"
"endmain \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
";FFT2 \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";FFT4 \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8 \n"
"func 4 \n"
"mov r50, r70 \n"
"mov r51, r74 \n"
"call 2 \n"
"mov r70, r50 \n"
"mov r74, r51 \n"
"mov r50, r71 \n"
"mov r51, r75 \n"
"call 2 \n"
"mov r71, r50 \n"
"mov r75, r51 \n"
"mov r50, r72 \n"
"mov r51, r76 \n"
"call 2 \n"
"mov r72, r50 \n"
"mov r76, r51 \n"
"mov r50, r73 \n"
"mov r51, r77 \n"
"call 2 \n"
"mov r73, r50 \n"
"mov r77, r51 \n"
"mov r40, r75 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r75, r40 \n"
"mul_ieee r75, r75, l1.w \n"
"mov r40, r76 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r76, r40 \n"
"mov r40, r77 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r77, r40 \n"
"mul_ieee r77, r77, l1.w \n"
"mov r60, r70 \n"
"mov r61, r71 \n"
"mov r62, r72 \n"
"mov r63, r73 \n"
"call 3 \n"
"mov r70, r60 \n"
"mov r71, r61 \n"
"mov r72, r62 \n"
"mov r73, r63 \n"
"mov r60, r74 \n"
"mov r61, r75 \n"
"mov r62, r76 \n"
"mov r63, r77 \n"
"call 3 \n"
"mov r74, r60 \n"
"mov r75, r61 \n"
"mov r76, r62 \n"
"mov r77, r63 \n"
"ret \n"
"endfunc \n"
";FFT32 \n"
"func 5 \n"
"mov r60, r400 \n"
"mov r61, r408 \n"
"mov r62, r416 \n"
"mov r63, r424 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r408, r61 \n"
"mov r416, r62 \n"
"mov r424, r63 \n"
"mov r60, r401 \n"
"mov r61, r409 \n"
"mov r62, r417 \n"
"mov r63, r425 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r409, r61 \n"
"mov r417, r62 \n"
"mov r425, r63 \n"
"mov r60, r402 \n"
"mov r61, r410 \n"
"mov r62, r418 \n"
"mov r63, r426 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r410, r61 \n"
"mov r418, r62 \n"
"mov r426, r63 \n"
"mov r60, r403 \n"
"mov r61, r411 \n"
"mov r62, r419 \n"
"mov r63, r427 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r411, r61 \n"
"mov r419, r62 \n"
"mov r427, r63 \n"
"mov r60, r404 \n"
"mov r61, r412 \n"
"mov r62, r420 \n"
"mov r63, r428 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r412, r61 \n"
"mov r420, r62 \n"
"mov r428, r63 \n"
"mov r60, r405 \n"
"mov r61, r413 \n"
"mov r62, r421 \n"
"mov r63, r429 \n"
"call 3 \n"
"mov r405, r60 \n"
"mov r413, r61 \n"
"mov r421, r62 \n"
"mov r429, r63 \n"
"mov r60, r406 \n"
"mov r61, r414 \n"
"mov r62, r422 \n"
"mov r63, r430 \n"
"call 3 \n"
"mov r406, r60 \n"
"mov r414, r61 \n"
"mov r422, r62 \n"
"mov r430, r63 \n"
"mov r60, r407 \n"
"mov r61, r415 \n"
"mov r62, r423 \n"
"mov r63, r431 \n"
"call 3 \n"
"mov r407, r60 \n"
"mov r415, r61 \n"
"mov r423, r62 \n"
"mov r431, r63 \n"
"mov r40, r409 \n"
"mov r41, l10.xyxy \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l10.zwzw \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41, l11.xyxy \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41, l11.zwzw \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l12.xyxy \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41, l12.zwzw \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r40, r417 \n"
"mov r41, l13.xyxy \n"
"call 0 \n"
"mov r417, r40 \n"
"mov r40, r418 \n"
"mov r41, l13.zwzw \n"
"call 0 \n"
"mov r418, r40 \n"
"mov r40, r419 \n"
"mov r41, l14.xyxy \n"
"call 0 \n"
"mov r419, r40 \n"
"mov r40, r420 \n"
"mov r41, l14.zwzw \n"
"call 0 \n"
"mov r420, r40 \n"
"mov r40, r421 \n"
"mov r41, l15.xyxy \n"
"call 0 \n"
"mov r421, r40 \n"
"mov r40, r422 \n"
"mov r41, l15.zwzw \n"
"call 0 \n"
"mov r422, r40 \n"
"mov r40, r423 \n"
"mov r41, l16.xyxy \n"
"call 0 \n"
"mov r423, r40 \n"
"mov r40, r425 \n"
"mov r41, l16.zwzw \n"
"call 0 \n"
"mov r425, r40 \n"
"mov r40, r426 \n"
"mov r41, l17.xyxy \n"
"call 0 \n"
"mov r426, r40 \n"
"mov r40, r427 \n"
"mov r41, l17.zwzw \n"
"call 0 \n"
"mov r427, r40 \n"
"mov r40, r428 \n"
"mov r41, l18.xyxy \n"
"call 0 \n"
"mov r428, r40 \n"
"mov r40, r429 \n"
"mov r41, l18.zwzw \n"
"call 0 \n"
"mov r429, r40 \n"
"mov r40, r430 \n"
"mov r41, l19.xyxy \n"
"call 0 \n"
"mov r430, r40 \n"
"mov r40, r431 \n"
"mov r41, l19.zwzw \n"
"call 0 \n"
"mov r431, r40 \n"
"mov r70, r400 \n"
"mov r71, r401 \n"
"mov r72, r402 \n"
"mov r73, r403 \n"
"mov r74, r404 \n"
"mov r75, r405 \n"
"mov r76, r406 \n"
"mov r77, r407 \n"
"call 4 \n"
"mov r400, r70 \n"
"mov r401, r71 \n"
"mov r402, r72 \n"
"mov r403, r73 \n"
"mov r404, r74 \n"
"mov r405, r75 \n"
"mov r406, r76 \n"
"mov r407, r77 \n"
"mov r70, r408 \n"
"mov r71, r409 \n"
"mov r72, r410 \n"
"mov r73, r411 \n"
"mov r74, r412 \n"
"mov r75, r413 \n"
"mov r76, r414 \n"
"mov r77, r415 \n"
"call 4 \n"
"mov r408, r70 \n"
"mov r409, r71 \n"
"mov r410, r72 \n"
"mov r411, r73 \n"
"mov r412, r74 \n"
"mov r413, r75 \n"
"mov r414, r76 \n"
"mov r415, r77 \n"
"mov r70, r416 \n"
"mov r71, r417 \n"
"mov r72, r418 \n"
"mov r73, r419 \n"
"mov r74, r420 \n"
"mov r75, r421 \n"
"mov r76, r422 \n"
"mov r77, r423 \n"
"call 4 \n"
"mov r416, r70 \n"
"mov r417, r71 \n"
"mov r418, r72 \n"
"mov r419, r73 \n"
"mov r420, r74 \n"
"mov r421, r75 \n"
"mov r422, r76 \n"
"mov r423, r77 \n"
"mov r70, r424 \n"
"mov r71, r425 \n"
"mov r72, r426 \n"
"mov r73, r427 \n"
"mov r74, r428 \n"
"mov r75, r429 \n"
"mov r76, r430 \n"
"mov r77, r431 \n"
"call 4 \n"
"mov r424, r70 \n"
"mov r425, r71 \n"
"mov r426, r72 \n"
"mov r427, r73 \n"
"mov r428, r74 \n"
"mov r429, r75 \n"
"mov r430, r76 \n"
"mov r431, r77 \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov r80._y__, r80.x \n"
"mov r400, g[r80.y+0] \n"
"mov r401, g[r80.y+64] \n"
"mov r402, g[r80.y+128] \n"
"mov r403, g[r80.y+192] \n"
"mov r404, g[r80.y+256] \n"
"mov r405, g[r80.y+320] \n"
"mov r406, g[r80.y+384] \n"
"mov r407, g[r80.y+448] \n"
"iadd r80._y__, r80.y, l30.z \n"
"mov r408, g[r80.y+0] \n"
"mov r409, g[r80.y+64] \n"
"mov r410, g[r80.y+128] \n"
"mov r411, g[r80.y+192] \n"
"mov r412, g[r80.y+256] \n"
"mov r413, g[r80.y+320] \n"
"mov r414, g[r80.y+384] \n"
"mov r415, g[r80.y+448] \n"
"iadd r80._y__, r80.y, l30.z \n"
"mov r416, g[r80.y+0] \n"
"mov r417, g[r80.y+64] \n"
"mov r418, g[r80.y+128] \n"
"mov r419, g[r80.y+192] \n"
"mov r420, g[r80.y+256] \n"
"mov r421, g[r80.y+320] \n"
"mov r422, g[r80.y+384] \n"
"mov r423, g[r80.y+448] \n"
"iadd r80._y__, r80.y, l30.z \n"
"mov r424, g[r80.y+0] \n"
"mov r425, g[r80.y+64] \n"
"mov r426, g[r80.y+128] \n"
"mov r427, g[r80.y+192] \n"
"mov r428, g[r80.y+256] \n"
"mov r429, g[r80.y+320] \n"
"mov r430, g[r80.y+384] \n"
"mov r431, g[r80.y+448] \n"
"ret \n"
"endfunc \n"
"func 7 \n"
"mov g[r80.x+0], r400 \n"
"mov g[r80.x+1], r416 \n"
"mov g[r80.x+2], r408 \n"
"mov g[r80.x+3], r424 \n"
"mov g[r80.x+4], r404 \n"
"mov g[r80.x+5], r420 \n"
"mov g[r80.x+6], r412 \n"
"mov g[r80.x+7], r428 \n"
"mov g[r80.x+8], r402 \n"
"mov g[r80.x+9], r418 \n"
"mov g[r80.x+10], r410 \n"
"mov g[r80.x+11], r426 \n"
"mov g[r80.x+12], r406 \n"
"mov g[r80.x+13], r422 \n"
"mov g[r80.x+14], r414 \n"
"mov g[r80.x+15], r430 \n"
"mov g[r80.x+16], r401 \n"
"mov g[r80.x+17], r417 \n"
"mov g[r80.x+18], r409 \n"
"mov g[r80.x+19], r425 \n"
"mov g[r80.x+20], r405 \n"
"mov g[r80.x+21], r421 \n"
"mov g[r80.x+22], r413 \n"
"mov g[r80.x+23], r429 \n"
"mov g[r80.x+24], r403 \n"
"mov g[r80.x+25], r419 \n"
"mov g[r80.x+26], r411 \n"
"mov g[r80.x+27], r427 \n"
"mov g[r80.x+28], r407 \n"
"mov g[r80.x+29], r423 \n"
"mov g[r80.x+30], r415 \n"
"mov g[r80.x+31], r431 \n"
"ret \n"
"endfunc \n"
"end \n";


static const char* _fft64_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 8 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l5 = (64.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x42800000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, -25.13274193f, -12.56637096f, -37.69911194f, ) \n"
"dcl_literal l10, 0x80000000, 0xC1C90FDB, 0xC1490FDB, 0xC216CBE4 \n"
"; l11 = (-6.283185482f, -31.41592598f, -18.84955597f, -43.98229599f, ) \n"
"dcl_literal l11, 0xC0C90FDB, 0xC1FB53D1, 0xC196CBE4, 0xC22FEDDF \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l29 = (1.261168618e-44f, 7.174648137e-43f, 0.0f, 0.0f, ) \n"
"dcl_literal l29, 0x00000009, 0x00000200, 0x00000000, 0x00000000 \n"
"; l30 = (1.401298464e-44f, 1.434929627e-42f, 0.0f, 0.0f, ) \n"
"dcl_literal l30, 0x0000000A, 0x00000400, 0x00000000, 0x00000000 \n"
"ishl r70.x___, vThreadGrpIdFlat0.x, l29.x \n"
"iadd r70.x___, vTidInGrpFlat0.x, r70.x \n"
"call 5 \n"
"call 4 \n"
"and r0.x___, vTidInGrpFlat0.x, l23.w \n"
"itof r80.x___, r0.x \n"
"div_zeroop(fltmax) r80, r80.x, l5.x \n"
"call 7 \n"
"call 81 \n"
"inot r75.x___, l24.x \n"
"and r75.x___, vTidInGrpFlat0.x, r75.x \n"
"and r75._y__, vTidInGrpFlat0.x, l24.x \n"
"call 91 \n"
"call 82 \n"
"inot r75.x___, l24.x \n"
"and r75.x___, vTidInGrpFlat0.x, r75.x \n"
"and r75._y__, vTidInGrpFlat0.x, l24.x \n"
"call 92 \n"
"call 83 \n"
"inot r75.x___, l24.x \n"
"and r75.x___, vTidInGrpFlat0.x, r75.x \n"
"and r75._y__, vTidInGrpFlat0.x, l24.x \n"
"call 93 \n"
"call 84 \n"
"inot r75.x___, l24.x \n"
"and r75.x___, vTidInGrpFlat0.x, r75.x \n"
"and r75._y__, vTidInGrpFlat0.x, l24.x \n"
"call 94 \n"
"call 4 \n"
"call 6 \n"
"endmain \n"
"func 7 \n"
"mul_ieee r100, r80, l10 \n"
"mul_ieee r101, r80, l11 \n"
"cos_vec r110, r100 \n"
"cos_vec r111, r101 \n"
"sin_vec r120, r100 \n"
"sin_vec r121, r101 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"ret \n"
"endfunc \n"
"func 81 \n"
"mov r500.x___, r400.x \n"
"mov r500._y__, r404.x \n"
"mov r500.__z_, r402.x \n"
"mov r500.___w, r406.x \n"
"mov r501.x___, r401.x \n"
"mov r501._y__, r405.x \n"
"mov r501.__z_, r403.x \n"
"mov r501.___w, r407.x \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 91 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"mov r400.x___, r500.x \n"
"mov r401.x___, r500.y \n"
"mov r402.x___, r500.z \n"
"mov r403.x___, r500.w \n"
"mov r404.x___, r501.x \n"
"mov r405.x___, r501.y \n"
"mov r406.x___, r501.z \n"
"mov r407.x___, r501.w \n"
"ret \n"
"endfunc \n"
"func 82 \n"
"mov r500.x___, r400.y \n"
"mov r500._y__, r404.y \n"
"mov r500.__z_, r402.y \n"
"mov r500.___w, r406.y \n"
"mov r501.x___, r401.y \n"
"mov r501._y__, r405.y \n"
"mov r501.__z_, r403.y \n"
"mov r501.___w, r407.y \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 92 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"mov r400._y__, r500.x \n"
"mov r401._y__, r500.y \n"
"mov r402._y__, r500.z \n"
"mov r403._y__, r500.w \n"
"mov r404._y__, r501.x \n"
"mov r405._y__, r501.y \n"
"mov r406._y__, r501.z \n"
"mov r407._y__, r501.w \n"
"ret \n"
"endfunc \n"
"func 83 \n"
"mov r500.x___, r400.z \n"
"mov r500._y__, r404.z \n"
"mov r500.__z_, r402.z \n"
"mov r500.___w, r406.z \n"
"mov r501.x___, r401.z \n"
"mov r501._y__, r405.z \n"
"mov r501.__z_, r403.z \n"
"mov r501.___w, r407.z \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 93 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"mov r400.__z_, r500.x \n"
"mov r401.__z_, r500.y \n"
"mov r402.__z_, r500.z \n"
"mov r403.__z_, r500.w \n"
"mov r404.__z_, r501.x \n"
"mov r405.__z_, r501.y \n"
"mov r406.__z_, r501.z \n"
"mov r407.__z_, r501.w \n"
"ret \n"
"endfunc \n"
"func 84 \n"
"mov r500.x___, r400.w \n"
"mov r500._y__, r404.w \n"
"mov r500.__z_, r402.w \n"
"mov r500.___w, r406.w \n"
"mov r501.x___, r401.w \n"
"mov r501._y__, r405.w \n"
"mov r501.__z_, r403.w \n"
"mov r501.___w, r407.w \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 94 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"mov r400.___w, r500.x \n"
"mov r401.___w, r500.y \n"
"mov r402.___w, r500.z \n"
"mov r403.___w, r500.w \n"
"mov r404.___w, r501.x \n"
"mov r405.___w, r501.y \n"
"mov r406.___w, r501.z \n"
"mov r407.___w, r501.w \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8. \n"
"func 4 \n"
"mov r50, r400 \n"
"mov r51, r404 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r404, r51 \n"
"mov r50, r401 \n"
"mov r51, r405 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r405, r51 \n"
"mov r50, r402 \n"
"mov r51, r406 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r406, r51 \n"
"mov r50, r403 \n"
"mov r51, r407 \n"
"call 2 \n"
"mov r403, r50 \n"
"mov r407, r51 \n"
"mov r40, r405 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+64] \n"
"mov r402, g[r70.x+128] \n"
"mov r403, g[r70.x+192] \n"
"mov r404, g[r70.x+256] \n"
"mov r405, g[r70.x+320] \n"
"mov r406, g[r70.x+384] \n"
"mov r407, g[r70.x+448] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+64], r404 \n"
"mov g[r70.x+128], r402 \n"
"mov g[r70.x+192], r406 \n"
"mov g[r70.x+256], r401 \n"
"mov g[r70.x+320], r405 \n"
"mov g[r70.x+384], r403 \n"
"mov g[r70.x+448], r407 \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _fft256_tomo_fft_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 16 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l3 = (-1.#QNANf, 1.681558157e-44f, 5.605193857e-45f, 1.121038771e-44f, ) \n"
"dcl_literal l3, 0xFFFFFFF3, 0x0000000C, 0x00000004, 0x00000008 \n"
"; l5 = (256.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x43800000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, -50.26548386f, -25.13274193f, -75.39822388f, ) \n"
"dcl_literal l10, 0x80000000, 0xC2490FDB, 0xC1C90FDB, 0xC296CBE4 \n"
"; l11 = (-12.56637096f, -62.83185196f, -37.69911194f, -87.96459198f, ) \n"
"dcl_literal l11, 0xC1490FDB, 0xC27B53D1, 0xC216CBE4, 0xC2AFEDDF \n"
"; l12 = (-6.283185482f, -56.54866791f, -31.41592598f, -81.68141174f, ) \n"
"dcl_literal l12, 0xC0C90FDB, 0xC26231D6, 0xC1FB53D1, 0xC2A35CE2 \n"
"; l13 = (-18.84955597f, -69.11503601f, -43.98229599f, -94.24777985f, ) \n"
"dcl_literal l13, 0xC196CBE4, 0xC28A3AE6, 0xC22FEDDF, 0xC2BC7EDD \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l29 = (1.261168618e-44f, 7.174648137e-43f, 0.0f, 0.0f, ) \n"
"dcl_literal l29, 0x00000009, 0x00000200, 0x00000000, 0x00000000 \n"
"; l30 = (1.401298464e-44f, 1.434929627e-42f, 0.0f, 0.0f, ) \n"
"dcl_literal l30, 0x0000000A, 0x00000400, 0x00000000, 0x00000000 \n"
"ishl r70.x___, vThreadGrpIdFlat0.x, l30.x \n"
"iadd r70.x___, vTidInGrpFlat0.x, r70.x \n"
"iadd r70._y__, r70.x, l29.y \n"
"call 5 \n"
"call 4 \n"
"and r0.x___, vTidInGrpFlat0.x, l24.w \n"
"itof r80.x___, r0.x \n"
"div_zeroop(fltmax) r80, r80.x, l5.x \n"
"call 7 \n"
"call 81 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 91 \n"
"call 82 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 92 \n"
"call 83 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 93 \n"
"call 84 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 94 \n"
"call 4 \n"
"call 6 \n"
"endmain \n"
"func 7 \n"
"mul_ieee r100, r80.x, l10 \n"
"mul_ieee r101, r80.x, l11 \n"
"mul_ieee r102, r80.x, l12 \n"
"mul_ieee r103, r80.x, l13 \n"
"cos_vec r110._yzw, r100 \n"
"cos_vec r111, r101 \n"
"cos_vec r112, r102 \n"
"cos_vec r113, r103 \n"
"sin_vec r120._yzw, r100 \n"
"sin_vec r121, r101 \n"
"sin_vec r122, r102 \n"
"sin_vec r123, r103 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"mov r40, r408 \n"
"mov r41.x_z_, r112.x \n"
"mov r41._y_w, r122.x \n"
"call 0 \n"
"mov r408, r40 \n"
"mov r40, r409 \n"
"mov r41.x_z_, r112.y \n"
"mov r41._y_w, r122.y \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41.x_z_, r112.z \n"
"mov r41._y_w, r122.z \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41.x_z_, r112.w \n"
"mov r41._y_w, r122.w \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41.x_z_, r113.x \n"
"mov r41._y_w, r123.x \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41.x_z_, r113.y \n"
"mov r41._y_w, r123.y \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41.x_z_, r113.z \n"
"mov r41._y_w, r123.z \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41.x_z_, r113.w \n"
"mov r41._y_w, r123.w \n"
"call 0 \n"
"mov r415, r40 \n"
"ret \n"
"endfunc \n"
"func 81 \n"
"mov r500.x___, r400.x \n"
"mov r500._y__, r408.x \n"
"mov r500.__z_, r404.x \n"
"mov r500.___w, r412.x \n"
"mov r501.x___, r402.x \n"
"mov r501._y__, r410.x \n"
"mov r501.__z_, r406.x \n"
"mov r501.___w, r414.x \n"
"mov r502.x___, r401.x \n"
"mov r502._y__, r409.x \n"
"mov r502.__z_, r405.x \n"
"mov r502.___w, r413.x \n"
"mov r503.x___, r403.x \n"
"mov r503._y__, r411.x \n"
"mov r503.__z_, r407.x \n"
"mov r503.___w, r415.x \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 91 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"mov r400.x___, r500.x \n"
"mov r401.x___, r500.y \n"
"mov r402.x___, r500.z \n"
"mov r403.x___, r500.w \n"
"mov r404.x___, r501.x \n"
"mov r405.x___, r501.y \n"
"mov r406.x___, r501.z \n"
"mov r407.x___, r501.w \n"
"mov r408.x___, r502.x \n"
"mov r409.x___, r502.y \n"
"mov r410.x___, r502.z \n"
"mov r411.x___, r502.w \n"
"mov r412.x___, r503.x \n"
"mov r413.x___, r503.y \n"
"mov r414.x___, r503.z \n"
"mov r415.x___, r503.w \n"
"ret \n"
"endfunc \n"
"func 82 \n"
"mov r500.x___, r400.y \n"
"mov r500._y__, r408.y \n"
"mov r500.__z_, r404.y \n"
"mov r500.___w, r412.y \n"
"mov r501.x___, r402.y \n"
"mov r501._y__, r410.y \n"
"mov r501.__z_, r406.y \n"
"mov r501.___w, r414.y \n"
"mov r502.x___, r401.y \n"
"mov r502._y__, r409.y \n"
"mov r502.__z_, r405.y \n"
"mov r502.___w, r413.y \n"
"mov r503.x___, r403.y \n"
"mov r503._y__, r411.y \n"
"mov r503.__z_, r407.y \n"
"mov r503.___w, r415.y \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 92 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"mov r400._y__, r500.x \n"
"mov r401._y__, r500.y \n"
"mov r402._y__, r500.z \n"
"mov r403._y__, r500.w \n"
"mov r404._y__, r501.x \n"
"mov r405._y__, r501.y \n"
"mov r406._y__, r501.z \n"
"mov r407._y__, r501.w \n"
"mov r408._y__, r502.x \n"
"mov r409._y__, r502.y \n"
"mov r410._y__, r502.z \n"
"mov r411._y__, r502.w \n"
"mov r412._y__, r503.x \n"
"mov r413._y__, r503.y \n"
"mov r414._y__, r503.z \n"
"mov r415._y__, r503.w \n"
"ret \n"
"endfunc \n"
"func 83 \n"
"mov r500.x___, r400.z \n"
"mov r500._y__, r408.z \n"
"mov r500.__z_, r404.z \n"
"mov r500.___w, r412.z \n"
"mov r501.x___, r402.z \n"
"mov r501._y__, r410.z \n"
"mov r501.__z_, r406.z \n"
"mov r501.___w, r414.z \n"
"mov r502.x___, r401.z \n"
"mov r502._y__, r409.z \n"
"mov r502.__z_, r405.z \n"
"mov r502.___w, r413.z \n"
"mov r503.x___, r403.z \n"
"mov r503._y__, r411.z \n"
"mov r503.__z_, r407.z \n"
"mov r503.___w, r415.z \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 93 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"mov r400.__z_, r500.x \n"
"mov r401.__z_, r500.y \n"
"mov r402.__z_, r500.z \n"
"mov r403.__z_, r500.w \n"
"mov r404.__z_, r501.x \n"
"mov r405.__z_, r501.y \n"
"mov r406.__z_, r501.z \n"
"mov r407.__z_, r501.w \n"
"mov r408.__z_, r502.x \n"
"mov r409.__z_, r502.y \n"
"mov r410.__z_, r502.z \n"
"mov r411.__z_, r502.w \n"
"mov r412.__z_, r503.x \n"
"mov r413.__z_, r503.y \n"
"mov r414.__z_, r503.z \n"
"mov r415.__z_, r503.w \n"
"ret \n"
"endfunc \n"
"func 84 \n"
"mov r500.x___, r400.w \n"
"mov r500._y__, r408.w \n"
"mov r500.__z_, r404.w \n"
"mov r500.___w, r412.w \n"
"mov r501.x___, r402.w \n"
"mov r501._y__, r410.w \n"
"mov r501.__z_, r406.w \n"
"mov r501.___w, r414.w \n"
"mov r502.x___, r401.w \n"
"mov r502._y__, r409.w \n"
"mov r502.__z_, r405.w \n"
"mov r502.___w, r413.w \n"
"mov r503.x___, r403.w \n"
"mov r503._y__, r411.w \n"
"mov r503.__z_, r407.w \n"
"mov r503.___w, r415.w \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 94 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"mov r400.___w, r500.x \n"
"mov r401.___w, r500.y \n"
"mov r402.___w, r500.z \n"
"mov r403.___w, r500.w \n"
"mov r404.___w, r501.x \n"
"mov r405.___w, r501.y \n"
"mov r406.___w, r501.z \n"
"mov r407.___w, r501.w \n"
"mov r408.___w, r502.x \n"
"mov r409.___w, r502.y \n"
"mov r410.___w, r502.z \n"
"mov r411.___w, r502.w \n"
"mov r412.___w, r503.x \n"
"mov r413.___w, r503.y \n"
"mov r414.___w, r503.z \n"
"mov r415.___w, r503.w \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT16 \n"
"func 4 \n"
"mov r60, r400 \n"
"mov r61, r404 \n"
"mov r62, r408 \n"
"mov r63, r412 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r404, r61 \n"
"mov r408, r62 \n"
"mov r412, r63 \n"
"mov r60, r401 \n"
"mov r61, r405 \n"
"mov r62, r409 \n"
"mov r63, r413 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r405, r61 \n"
"mov r409, r62 \n"
"mov r413, r63 \n"
"mov r60, r402 \n"
"mov r61, r406 \n"
"mov r62, r410 \n"
"mov r63, r414 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r406, r61 \n"
"mov r410, r62 \n"
"mov r414, r63 \n"
"mov r60, r403 \n"
"mov r61, r407 \n"
"mov r62, r411 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r407, r61 \n"
"mov r411, r62 \n"
"mov r415, r63 \n"
"mov r40, r405 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r40, r409 \n"
"mov r41, l2.yzyz \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r410, r40 \n"
"mul_ieee r410, r410, l1.w \n"
"mov r40, r411 \n"
"mov r41, l2.xwxw \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r413 \n"
"mov r41, l2.xwxw \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r414, r40 \n"
"mul_ieee r414, r414, l1.w \n"
"mov r40, r415 \n"
"mov r41, l2.wxwx \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"mov r60, r408 \n"
"mov r61, r409 \n"
"mov r62, r410 \n"
"mov r63, r411 \n"
"call 3 \n"
"mov r408, r60 \n"
"mov r409, r61 \n"
"mov r410, r62 \n"
"mov r411, r63 \n"
"mov r60, r412 \n"
"mov r61, r413 \n"
"mov r62, r414 \n"
"mov r63, r415 \n"
"call 3 \n"
"mov r412, r60 \n"
"mov r413, r61 \n"
"mov r414, r62 \n"
"mov r415, r63 \n"
"ret \n"
"endfunc \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+64] \n"
"mov r402, g[r70.x+128] \n"
"mov r403, g[r70.x+192] \n"
"mov r404, g[r70.x+256] \n"
"mov r405, g[r70.x+320] \n"
"mov r406, g[r70.x+384] \n"
"mov r407, g[r70.x+448] \n"
"mov r408, g[r70.y+0] \n"
"mov r409, g[r70.y+64] \n"
"mov r410, g[r70.y+128] \n"
"mov r411, g[r70.y+192] \n"
"mov r412, g[r70.y+256] \n"
"mov r413, g[r70.y+320] \n"
"mov r414, g[r70.y+384] \n"
"mov r415, g[r70.y+448] \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+64], r408 \n"
"mov g[r70.x+128], r404 \n"
"mov g[r70.x+192], r412 \n"
"mov g[r70.x+256], r402 \n"
"mov g[r70.x+320], r410 \n"
"mov g[r70.x+384], r406 \n"
"mov g[r70.x+448], r414 \n"
"mov g[r70.y+0], r401 \n"
"mov g[r70.y+64], r409 \n"
"mov g[r70.y+128], r405 \n"
"mov g[r70.y+192], r413 \n"
"mov g[r70.y+256], r403 \n"
"mov g[r70.y+320], r411 \n"
"mov g[r70.y+384], r407 \n"
"mov g[r70.y+448], r415 \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _fft512_tomo_fft_source_ =
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 32 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.242077543e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000010 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (0.3826834261f, 0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l2, 0x3EC3EF15, 0x3F6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l3 = (-1.#QNANf, 1.681558157e-44f, 5.605193857e-45f, 1.121038771e-44f, ) \n"
"dcl_literal l3, 0xFFFFFFF3, 0x0000000C, 0x00000004, 0x00000008 \n"
"; l5 = (512.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x44000000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l10 = (0.9238795042f, -0.3826834261f, 0.7071067691f, -0.7071067691f, ) \n"
"dcl_literal l10, 0x3F6C835E, 0xBEC3EF15, 0x3F3504F3, 0xBF3504F3 \n"
"; l11 = (0.3826834261f, -0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l11, 0x3EC3EF15, 0xBF6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l12 = (-0.7071067691f, -0.7071067691f, -0.9238795042f, -0.3826834261f, ) \n"
"dcl_literal l12, 0xBF3504F3, 0xBF3504F3, 0xBF6C835E, 0xBEC3EF15 \n"
"; l13 = (0.9807852507f, -0.1950903237f, 0.9238795042f, -0.3826834261f, ) \n"
"dcl_literal l13, 0x3F7B14BE, 0xBE47C5C2, 0x3F6C835E, 0xBEC3EF15 \n"
"; l14 = (0.8314695954f, -0.5555702448f, 0.7071067691f, -0.7071067691f, ) \n"
"dcl_literal l14, 0x3F54DB31, 0xBF0E39DA, 0x3F3504F3, 0xBF3504F3 \n"
"; l15 = (0.5555702448f, -0.8314695954f, 0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l15, 0x3F0E39DA, 0xBF54DB31, 0x3EC3EF15, 0xBF6C835E \n"
"; l16 = (0.1950903237f, -0.9807852507f, 0.8314695954f, -0.5555702448f, ) \n"
"dcl_literal l16, 0x3E47C5C2, 0xBF7B14BE, 0x3F54DB31, 0xBF0E39DA \n"
"; l17 = (0.3826834261f, -0.9238795042f, -0.1950903237f, -0.9807852507f, ) \n"
"dcl_literal l17, 0x3EC3EF15, 0xBF6C835E, 0xBE47C5C2, 0xBF7B14BE \n"
"; l18 = (-0.7071067691f, -0.7071067691f, -0.9807852507f, -0.1950903237f, ) \n"
"dcl_literal l18, 0xBF3504F3, 0xBF3504F3, 0xBF7B14BE, 0xBE47C5C2 \n"
"; l19 = (-0.9238795042f, 0.3826834261f, -0.5555702448f, 0.8314695954f, ) \n"
"dcl_literal l19, 0xBF6C835E, 0x3EC3EF15, 0xBF0E39DA, 0x3F54DB31 \n"
"; l40 = (0.0f, -100.5309677f, -50.26548386f, -150.7964478f, ) \n"
"dcl_literal l40, 0x80000000, 0xC2C90FDB, 0xC2490FDB, 0xC316CBE4 \n"
"; l41 = (-25.13274193f, -125.6637039f, -75.39822388f, -175.929184f, ) \n"
"dcl_literal l41, 0xC1C90FDB, 0xC2FB53D1, 0xC296CBE4, 0xC32FEDDF \n"
"; l42 = (-12.56637096f, -113.0973358f, -62.83185196f, -163.3628235f, ) \n"
"dcl_literal l42, 0xC1490FDB, 0xC2E231D6, 0xC27B53D1, 0xC3235CE2 \n"
"; l43 = (-37.69911194f, -138.230072f, -87.96459198f, -188.4955597f, ) \n"
"dcl_literal l43, 0xC216CBE4, 0xC30A3AE6, 0xC2AFEDDF, 0xC33C7EDD \n"
"; l44 = (-6.283185482f, -106.8141479f, -56.54866791f, -157.0796356f, ) \n"
"dcl_literal l44, 0xC0C90FDB, 0xC2D5A0D8, 0xC26231D6, 0xC31D1463 \n"
"; l45 = (-31.41592598f, -131.9468842f, -81.68141174f, -182.2123718f, ) \n"
"dcl_literal l45, 0xC1FB53D1, 0xC303F267, 0xC2A35CE2, 0xC336365E \n"
"; l46 = (-18.84955597f, -119.3805237f, -69.11503601f, -169.6459961f, ) \n"
"dcl_literal l46, 0xC196CBE4, 0xC2EEC2D4, 0xC28A3AE6, 0xC329A560 \n"
"; l47 = (-43.98229599f, -144.5132599f, -94.24777985f, -194.7787476f, ) \n"
"dcl_literal l47, 0xC22FEDDF, 0xC3108365, 0xC2BC7EDD, 0xC342C75C \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l30 = (1.541428311e-44f, 7.006492322e-45f, 8.968310172e-44f, 7.174648137e-43f, ) \n"
"dcl_literal l30, 0x0000000B, 0x00000005, 0x00000040, 0x00000200 \n"
"ishl r90.x___, vThreadGrpIdFlat0.x, l30.x \n"
"call 11 \n"
"call 6 \n"
"and r0.x___, vTidInGrpFlat0.x, l24.w \n"
"itof r200.x___, r0.x \n"
"div_zeroop(fltmax) r200, r200.x, l5.x \n"
"call 10 \n"
"and r85.x___, vTidInGrpFlat0.x, l3.x \n"
"and r85._y__, vTidInGrpFlat0.x, l3.y \n"
"ishl r85._y__, r85.y, l0.y \n"
"call 81 \n"
"call 91 \n"
"call 82 \n"
"call 92 \n"
"call 83 \n"
"call 93 \n"
"call 84 \n"
"call 94 \n"
"call 7 \n"
"call 13 \n"
"endmain \n"
"func 11 \n"
"iadd r80.x___, r90.x, vTidInGrpFlat0.x \n"
"iadd r80._y__, r80.x, l30.w \n"
"iadd r80.__z_, r80.y, l30.w \n"
"iadd r80.___w, r80.z, l30.w \n"
"mov r400, g[r80.x+0] \n"
"mov r401, g[r80.x+64] \n"
"mov r402, g[r80.x+128] \n"
"mov r403, g[r80.x+192] \n"
"mov r404, g[r80.x+256] \n"
"mov r405, g[r80.x+320] \n"
"mov r406, g[r80.x+384] \n"
"mov r407, g[r80.x+448] \n"
"mov r408, g[r80.y+0] \n"
"mov r409, g[r80.y+64] \n"
"mov r410, g[r80.y+128] \n"
"mov r411, g[r80.y+192] \n"
"mov r412, g[r80.y+256] \n"
"mov r413, g[r80.y+320] \n"
"mov r414, g[r80.y+384] \n"
"mov r415, g[r80.y+448] \n"
"mov r416, g[r80.z+0] \n"
"mov r417, g[r80.z+64] \n"
"mov r418, g[r80.z+128] \n"
"mov r419, g[r80.z+192] \n"
"mov r420, g[r80.z+256] \n"
"mov r421, g[r80.z+320] \n"
"mov r422, g[r80.z+384] \n"
"mov r423, g[r80.z+448] \n"
"mov r424, g[r80.w+0] \n"
"mov r425, g[r80.w+64] \n"
"mov r426, g[r80.w+128] \n"
"mov r427, g[r80.w+192] \n"
"mov r428, g[r80.w+256] \n"
"mov r429, g[r80.w+320] \n"
"mov r430, g[r80.w+384] \n"
"mov r431, g[r80.w+448] \n"
"ret \n"
"endfunc \n"
"func 13 \n"
"ishl r80.x___, vTidInGrpFlat0.x, l30.y \n"
"iadd r80.x___, r90.x, r80.x \n"
"mov g[r80.x+0], r400 \n"
"mov g[r80.x+1], r408 \n"
"mov g[r80.x+2], r404 \n"
"mov g[r80.x+3], r412 \n"
"mov g[r80.x+4], r402 \n"
"mov g[r80.x+5], r410 \n"
"mov g[r80.x+6], r406 \n"
"mov g[r80.x+7], r414 \n"
"mov g[r80.x+8], r401 \n"
"mov g[r80.x+9], r409 \n"
"mov g[r80.x+10], r405 \n"
"mov g[r80.x+11], r413 \n"
"mov g[r80.x+12], r403 \n"
"mov g[r80.x+13], r411 \n"
"mov g[r80.x+14], r407 \n"
"mov g[r80.x+15], r415 \n"
"mov g[r80.x+16], r416 \n"
"mov g[r80.x+17], r424 \n"
"mov g[r80.x+18], r420 \n"
"mov g[r80.x+19], r428 \n"
"mov g[r80.x+20], r418 \n"
"mov g[r80.x+21], r426 \n"
"mov g[r80.x+22], r422 \n"
"mov g[r80.x+23], r430 \n"
"mov g[r80.x+24], r417 \n"
"mov g[r80.x+25], r425 \n"
"mov g[r80.x+26], r421 \n"
"mov g[r80.x+27], r429 \n"
"mov g[r80.x+28], r419 \n"
"mov g[r80.x+29], r427 \n"
"mov g[r80.x+30], r423 \n"
"mov g[r80.x+31], r431 \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
";FFT2 \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";FFT4 \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8 \n"
"func 4 \n"
"mov r50, r70 \n"
"mov r51, r74 \n"
"call 2 \n"
"mov r70, r50 \n"
"mov r74, r51 \n"
"mov r50, r71 \n"
"mov r51, r75 \n"
"call 2 \n"
"mov r71, r50 \n"
"mov r75, r51 \n"
"mov r50, r72 \n"
"mov r51, r76 \n"
"call 2 \n"
"mov r72, r50 \n"
"mov r76, r51 \n"
"mov r50, r73 \n"
"mov r51, r77 \n"
"call 2 \n"
"mov r73, r50 \n"
"mov r77, r51 \n"
"mov r40, r75 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r75, r40 \n"
"mul_ieee r75, r75, l1.w \n"
"mov r40, r76 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r76, r40 \n"
"mov r40, r77 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r77, r40 \n"
"mul_ieee r77, r77, l1.w \n"
"mov r60, r70 \n"
"mov r61, r71 \n"
"mov r62, r72 \n"
"mov r63, r73 \n"
"call 3 \n"
"mov r70, r60 \n"
"mov r71, r61 \n"
"mov r72, r62 \n"
"mov r73, r63 \n"
"mov r60, r74 \n"
"mov r61, r75 \n"
"mov r62, r76 \n"
"mov r63, r77 \n"
"call 3 \n"
"mov r74, r60 \n"
"mov r75, r61 \n"
"mov r76, r62 \n"
"mov r77, r63 \n"
"ret \n"
"endfunc \n"
";FFT16 \n"
"func 5 \n"
"mov r60, r700 \n"
"mov r61, r704 \n"
"mov r62, r708 \n"
"mov r63, r712 \n"
"call 3 \n"
"mov r700, r60 \n"
"mov r704, r61 \n"
"mov r708, r62 \n"
"mov r712, r63 \n"
"mov r60, r701 \n"
"mov r61, r705 \n"
"mov r62, r709 \n"
"mov r63, r713 \n"
"call 3 \n"
"mov r701, r60 \n"
"mov r705, r61 \n"
"mov r709, r62 \n"
"mov r713, r63 \n"
"mov r60, r702 \n"
"mov r61, r706 \n"
"mov r62, r710 \n"
"mov r63, r714 \n"
"call 3 \n"
"mov r702, r60 \n"
"mov r706, r61 \n"
"mov r710, r62 \n"
"mov r714, r63 \n"
"mov r60, r703 \n"
"mov r61, r707 \n"
"mov r62, r711 \n"
"mov r63, r715 \n"
"call 3 \n"
"mov r703, r60 \n"
"mov r707, r61 \n"
"mov r711, r62 \n"
"mov r715, r63 \n"
"mov r40, r705 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r705, r40 \n"
"mul_ieee r705, r705, l1.w \n"
"mov r40, r706 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r706, r40 \n"
"mov r40, r707 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r707, r40 \n"
"mul_ieee r707, r707, l1.w \n"
"mov r40, r709 \n"
"mov r41, l2.yzyz \n"
"call 0 \n"
"mov r709, r40 \n"
"mov r40, r710 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r710, r40 \n"
"mul_ieee r710, r710, l1.w \n"
"mov r40, r711 \n"
"mov r41, l2.xwxw \n"
"call 0 \n"
"mov r711, r40 \n"
"mov r40, r713 \n"
"mov r41, l2.xwxw \n"
"call 0 \n"
"mov r713, r40 \n"
"mov r40, r714 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r714, r40 \n"
"mul_ieee r714, r714, l1.w \n"
"mov r40, r715 \n"
"mov r41, l2.wxwx \n"
"call 0 \n"
"mov r715, r40 \n"
"mov r60, r700 \n"
"mov r61, r701 \n"
"mov r62, r702 \n"
"mov r63, r703 \n"
"call 3 \n"
"mov r700, r60 \n"
"mov r701, r61 \n"
"mov r702, r62 \n"
"mov r703, r63 \n"
"mov r60, r704 \n"
"mov r61, r705 \n"
"mov r62, r706 \n"
"mov r63, r707 \n"
"call 3 \n"
"mov r704, r60 \n"
"mov r705, r61 \n"
"mov r706, r62 \n"
"mov r707, r63 \n"
"mov r60, r708 \n"
"mov r61, r709 \n"
"mov r62, r710 \n"
"mov r63, r711 \n"
"call 3 \n"
"mov r708, r60 \n"
"mov r709, r61 \n"
"mov r710, r62 \n"
"mov r711, r63 \n"
"mov r60, r712 \n"
"mov r61, r713 \n"
"mov r62, r714 \n"
"mov r63, r715 \n"
"call 3 \n"
"mov r712, r60 \n"
"mov r713, r61 \n"
"mov r714, r62 \n"
"mov r715, r63 \n"
"ret \n"
"endfunc \n"
";FFT32 \n"
"func 6 \n"
"mov r60, r400 \n"
"mov r61, r408 \n"
"mov r62, r416 \n"
"mov r63, r424 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r408, r61 \n"
"mov r416, r62 \n"
"mov r424, r63 \n"
"mov r60, r401 \n"
"mov r61, r409 \n"
"mov r62, r417 \n"
"mov r63, r425 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r409, r61 \n"
"mov r417, r62 \n"
"mov r425, r63 \n"
"mov r60, r402 \n"
"mov r61, r410 \n"
"mov r62, r418 \n"
"mov r63, r426 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r410, r61 \n"
"mov r418, r62 \n"
"mov r426, r63 \n"
"mov r60, r403 \n"
"mov r61, r411 \n"
"mov r62, r419 \n"
"mov r63, r427 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r411, r61 \n"
"mov r419, r62 \n"
"mov r427, r63 \n"
"mov r60, r404 \n"
"mov r61, r412 \n"
"mov r62, r420 \n"
"mov r63, r428 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r412, r61 \n"
"mov r420, r62 \n"
"mov r428, r63 \n"
"mov r60, r405 \n"
"mov r61, r413 \n"
"mov r62, r421 \n"
"mov r63, r429 \n"
"call 3 \n"
"mov r405, r60 \n"
"mov r413, r61 \n"
"mov r421, r62 \n"
"mov r429, r63 \n"
"mov r60, r406 \n"
"mov r61, r414 \n"
"mov r62, r422 \n"
"mov r63, r430 \n"
"call 3 \n"
"mov r406, r60 \n"
"mov r414, r61 \n"
"mov r422, r62 \n"
"mov r430, r63 \n"
"mov r60, r407 \n"
"mov r61, r415 \n"
"mov r62, r423 \n"
"mov r63, r431 \n"
"call 3 \n"
"mov r407, r60 \n"
"mov r415, r61 \n"
"mov r423, r62 \n"
"mov r431, r63 \n"
"mov r40, r409 \n"
"mov r41, l10.xyxy \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l10.zwzw \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41, l11.xyxy \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41, l11.zwzw \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l12.xyxy \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41, l12.zwzw \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r40, r417 \n"
"mov r41, l13.xyxy \n"
"call 0 \n"
"mov r417, r40 \n"
"mov r40, r418 \n"
"mov r41, l13.zwzw \n"
"call 0 \n"
"mov r418, r40 \n"
"mov r40, r419 \n"
"mov r41, l14.xyxy \n"
"call 0 \n"
"mov r419, r40 \n"
"mov r40, r420 \n"
"mov r41, l14.zwzw \n"
"call 0 \n"
"mov r420, r40 \n"
"mov r40, r421 \n"
"mov r41, l15.xyxy \n"
"call 0 \n"
"mov r421, r40 \n"
"mov r40, r422 \n"
"mov r41, l15.zwzw \n"
"call 0 \n"
"mov r422, r40 \n"
"mov r40, r423 \n"
"mov r41, l16.xyxy \n"
"call 0 \n"
"mov r423, r40 \n"
"mov r40, r425 \n"
"mov r41, l16.zwzw \n"
"call 0 \n"
"mov r425, r40 \n"
"mov r40, r426 \n"
"mov r41, l17.xyxy \n"
"call 0 \n"
"mov r426, r40 \n"
"mov r40, r427 \n"
"mov r41, l17.zwzw \n"
"call 0 \n"
"mov r427, r40 \n"
"mov r40, r428 \n"
"mov r41, l18.xyxy \n"
"call 0 \n"
"mov r428, r40 \n"
"mov r40, r429 \n"
"mov r41, l18.zwzw \n"
"call 0 \n"
"mov r429, r40 \n"
"mov r40, r430 \n"
"mov r41, l19.xyxy \n"
"call 0 \n"
"mov r430, r40 \n"
"mov r40, r431 \n"
"mov r41, l19.zwzw \n"
"call 0 \n"
"mov r431, r40 \n"
"mov r70, r400 \n"
"mov r71, r401 \n"
"mov r72, r402 \n"
"mov r73, r403 \n"
"mov r74, r404 \n"
"mov r75, r405 \n"
"mov r76, r406 \n"
"mov r77, r407 \n"
"call 4 \n"
"mov r400, r70 \n"
"mov r401, r71 \n"
"mov r402, r72 \n"
"mov r403, r73 \n"
"mov r404, r74 \n"
"mov r405, r75 \n"
"mov r406, r76 \n"
"mov r407, r77 \n"
"mov r70, r408 \n"
"mov r71, r409 \n"
"mov r72, r410 \n"
"mov r73, r411 \n"
"mov r74, r412 \n"
"mov r75, r413 \n"
"mov r76, r414 \n"
"mov r77, r415 \n"
"call 4 \n"
"mov r408, r70 \n"
"mov r409, r71 \n"
"mov r410, r72 \n"
"mov r411, r73 \n"
"mov r412, r74 \n"
"mov r413, r75 \n"
"mov r414, r76 \n"
"mov r415, r77 \n"
"mov r70, r416 \n"
"mov r71, r417 \n"
"mov r72, r418 \n"
"mov r73, r419 \n"
"mov r74, r420 \n"
"mov r75, r421 \n"
"mov r76, r422 \n"
"mov r77, r423 \n"
"call 4 \n"
"mov r416, r70 \n"
"mov r417, r71 \n"
"mov r418, r72 \n"
"mov r419, r73 \n"
"mov r420, r74 \n"
"mov r421, r75 \n"
"mov r422, r76 \n"
"mov r423, r77 \n"
"mov r70, r424 \n"
"mov r71, r425 \n"
"mov r72, r426 \n"
"mov r73, r427 \n"
"mov r74, r428 \n"
"mov r75, r429 \n"
"mov r76, r430 \n"
"mov r77, r431 \n"
"call 4 \n"
"mov r424, r70 \n"
"mov r425, r71 \n"
"mov r426, r72 \n"
"mov r427, r73 \n"
"mov r428, r74 \n"
"mov r429, r75 \n"
"mov r430, r76 \n"
"mov r431, r77 \n"
"ret \n"
"endfunc \n"
"func 7 \n"
"mov r700, r400 \n"
"mov r701, r401 \n"
"mov r702, r402 \n"
"mov r703, r403 \n"
"mov r704, r404 \n"
"mov r705, r405 \n"
"mov r706, r406 \n"
"mov r707, r407 \n"
"mov r708, r408 \n"
"mov r709, r409 \n"
"mov r710, r410 \n"
"mov r711, r411 \n"
"mov r712, r412 \n"
"mov r713, r413 \n"
"mov r714, r414 \n"
"mov r715, r415 \n"
"call 5 \n"
"mov r400, r700 \n"
"mov r401, r701 \n"
"mov r402, r702 \n"
"mov r403, r703 \n"
"mov r404, r704 \n"
"mov r405, r705 \n"
"mov r406, r706 \n"
"mov r407, r707 \n"
"mov r408, r708 \n"
"mov r409, r709 \n"
"mov r410, r710 \n"
"mov r411, r711 \n"
"mov r412, r712 \n"
"mov r413, r713 \n"
"mov r414, r714 \n"
"mov r415, r715 \n"
"mov r700, r416 \n"
"mov r701, r417 \n"
"mov r702, r418 \n"
"mov r703, r419 \n"
"mov r704, r420 \n"
"mov r705, r421 \n"
"mov r706, r422 \n"
"mov r707, r423 \n"
"mov r708, r424 \n"
"mov r709, r425 \n"
"mov r710, r426 \n"
"mov r711, r427 \n"
"mov r712, r428 \n"
"mov r713, r429 \n"
"mov r714, r430 \n"
"mov r715, r431 \n"
"call 5 \n"
"mov r416, r700 \n"
"mov r417, r701 \n"
"mov r418, r702 \n"
"mov r419, r703 \n"
"mov r420, r704 \n"
"mov r421, r705 \n"
"mov r422, r706 \n"
"mov r423, r707 \n"
"mov r424, r708 \n"
"mov r425, r709 \n"
"mov r426, r710 \n"
"mov r427, r711 \n"
"mov r428, r712 \n"
"mov r429, r713 \n"
"mov r430, r714 \n"
"mov r431, r715 \n"
"ret \n"
"endfunc \n"
"func 81 \n"
"mov r500.x___, r400.x \n"
"mov r500._y__, r408.x \n"
"mov r500.__z_, r404.x \n"
"mov r500.___w, r412.x \n"
"mov r501.x___, r416.x \n"
"mov r501._y__, r424.x \n"
"mov r501.__z_, r420.x \n"
"mov r501.___w, r428.x \n"
"mov r502.x___, r402.x \n"
"mov r502._y__, r410.x \n"
"mov r502.__z_, r406.x \n"
"mov r502.___w, r414.x \n"
"mov r503.x___, r418.x \n"
"mov r503._y__, r426.x \n"
"mov r503.__z_, r422.x \n"
"mov r503.___w, r430.x \n"
"mov r504.x___, r401.x \n"
"mov r504._y__, r409.x \n"
"mov r504.__z_, r405.x \n"
"mov r504.___w, r413.x \n"
"mov r505.x___, r417.x \n"
"mov r505._y__, r425.x \n"
"mov r505.__z_, r421.x \n"
"mov r505.___w, r429.x \n"
"mov r506.x___, r403.x \n"
"mov r506._y__, r411.x \n"
"mov r506.__z_, r407.x \n"
"mov r506.___w, r415.x \n"
"mov r507.x___, r419.x \n"
"mov r507._y__, r427.x \n"
"mov r507.__z_, r423.x \n"
"mov r507.___w, r431.x \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 91 \n"
"mov r95, r85.xyxy \n"
"lds_read_vec_neighborExch r500, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r501, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r502, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r503, r95.xyyy \n"
"mov r95, r85.xyxy \n"
"iadd r95._y__, r95.y, l24.x \n"
"lds_read_vec_neighborExch r504, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r505, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r506, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r507, r95.xyyy \n"
"mov r400.x___, r500.x \n"
"mov r401.x___, r500.y \n"
"mov r402.x___, r500.z \n"
"mov r403.x___, r500.w \n"
"mov r404.x___, r501.x \n"
"mov r405.x___, r501.y \n"
"mov r406.x___, r501.z \n"
"mov r407.x___, r501.w \n"
"mov r408.x___, r502.x \n"
"mov r409.x___, r502.y \n"
"mov r410.x___, r502.z \n"
"mov r411.x___, r502.w \n"
"mov r412.x___, r503.x \n"
"mov r413.x___, r503.y \n"
"mov r414.x___, r503.z \n"
"mov r415.x___, r503.w \n"
"mov r416.x___, r504.x \n"
"mov r417.x___, r504.y \n"
"mov r418.x___, r504.z \n"
"mov r419.x___, r504.w \n"
"mov r420.x___, r505.x \n"
"mov r421.x___, r505.y \n"
"mov r422.x___, r505.z \n"
"mov r423.x___, r505.w \n"
"mov r424.x___, r506.x \n"
"mov r425.x___, r506.y \n"
"mov r426.x___, r506.z \n"
"mov r427.x___, r506.w \n"
"mov r428.x___, r507.x \n"
"mov r429.x___, r507.y \n"
"mov r430.x___, r507.z \n"
"mov r431.x___, r507.w \n"
"ret \n"
"endfunc \n"
"func 82 \n"
"mov r500.x___, r400.y \n"
"mov r500._y__, r408.y \n"
"mov r500.__z_, r404.y \n"
"mov r500.___w, r412.y \n"
"mov r501.x___, r416.y \n"
"mov r501._y__, r424.y \n"
"mov r501.__z_, r420.y \n"
"mov r501.___w, r428.y \n"
"mov r502.x___, r402.y \n"
"mov r502._y__, r410.y \n"
"mov r502.__z_, r406.y \n"
"mov r502.___w, r414.y \n"
"mov r503.x___, r418.y \n"
"mov r503._y__, r426.y \n"
"mov r503.__z_, r422.y \n"
"mov r503.___w, r430.y \n"
"mov r504.x___, r401.y \n"
"mov r504._y__, r409.y \n"
"mov r504.__z_, r405.y \n"
"mov r504.___w, r413.y \n"
"mov r505.x___, r417.y \n"
"mov r505._y__, r425.y \n"
"mov r505.__z_, r421.y \n"
"mov r505.___w, r429.y \n"
"mov r506.x___, r403.y \n"
"mov r506._y__, r411.y \n"
"mov r506.__z_, r407.y \n"
"mov r506.___w, r415.y \n"
"mov r507.x___, r419.y \n"
"mov r507._y__, r427.y \n"
"mov r507.__z_, r423.y \n"
"mov r507.___w, r431.y \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 92 \n"
"mov r95, r85.xyxy \n"
"lds_read_vec_neighborExch r500, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r501, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r502, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r503, r95.xyyy \n"
"mov r95, r85.xyxy \n"
"iadd r95._y__, r95.y, l24.x \n"
"lds_read_vec_neighborExch r504, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r505, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r506, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r507, r95.xyyy \n"
"mov r400._y__, r500.x \n"
"mov r401._y__, r500.y \n"
"mov r402._y__, r500.z \n"
"mov r403._y__, r500.w \n"
"mov r404._y__, r501.x \n"
"mov r405._y__, r501.y \n"
"mov r406._y__, r501.z \n"
"mov r407._y__, r501.w \n"
"mov r408._y__, r502.x \n"
"mov r409._y__, r502.y \n"
"mov r410._y__, r502.z \n"
"mov r411._y__, r502.w \n"
"mov r412._y__, r503.x \n"
"mov r413._y__, r503.y \n"
"mov r414._y__, r503.z \n"
"mov r415._y__, r503.w \n"
"mov r416._y__, r504.x \n"
"mov r417._y__, r504.y \n"
"mov r418._y__, r504.z \n"
"mov r419._y__, r504.w \n"
"mov r420._y__, r505.x \n"
"mov r421._y__, r505.y \n"
"mov r422._y__, r505.z \n"
"mov r423._y__, r505.w \n"
"mov r424._y__, r506.x \n"
"mov r425._y__, r506.y \n"
"mov r426._y__, r506.z \n"
"mov r427._y__, r506.w \n"
"mov r428._y__, r507.x \n"
"mov r429._y__, r507.y \n"
"mov r430._y__, r507.z \n"
"mov r431._y__, r507.w \n"
"ret \n"
"endfunc \n"
"func 83 \n"
"mov r500.x___, r400.z \n"
"mov r500._y__, r408.z \n"
"mov r500.__z_, r404.z \n"
"mov r500.___w, r412.z \n"
"mov r501.x___, r416.z \n"
"mov r501._y__, r424.z \n"
"mov r501.__z_, r420.z \n"
"mov r501.___w, r428.z \n"
"mov r502.x___, r402.z \n"
"mov r502._y__, r410.z \n"
"mov r502.__z_, r406.z \n"
"mov r502.___w, r414.z \n"
"mov r503.x___, r418.z \n"
"mov r503._y__, r426.z \n"
"mov r503.__z_, r422.z \n"
"mov r503.___w, r430.z \n"
"mov r504.x___, r401.z \n"
"mov r504._y__, r409.z \n"
"mov r504.__z_, r405.z \n"
"mov r504.___w, r413.z \n"
"mov r505.x___, r417.z \n"
"mov r505._y__, r425.z \n"
"mov r505.__z_, r421.z \n"
"mov r505.___w, r429.z \n"
"mov r506.x___, r403.z \n"
"mov r506._y__, r411.z \n"
"mov r506.__z_, r407.z \n"
"mov r506.___w, r415.z \n"
"mov r507.x___, r419.z \n"
"mov r507._y__, r427.z \n"
"mov r507.__z_, r423.z \n"
"mov r507.___w, r431.z \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 93 \n"
"mov r95, r85.xyxy \n"
"lds_read_vec_neighborExch r500, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r501, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r502, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r503, r95.xyyy \n"
"mov r95, r85.xyxy \n"
"iadd r95._y__, r95.y, l24.x \n"
"lds_read_vec_neighborExch r504, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r505, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r506, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r507, r95.xyyy \n"
"mov r400.__z_, r500.x \n"
"mov r401.__z_, r500.y \n"
"mov r402.__z_, r500.z \n"
"mov r403.__z_, r500.w \n"
"mov r404.__z_, r501.x \n"
"mov r405.__z_, r501.y \n"
"mov r406.__z_, r501.z \n"
"mov r407.__z_, r501.w \n"
"mov r408.__z_, r502.x \n"
"mov r409.__z_, r502.y \n"
"mov r410.__z_, r502.z \n"
"mov r411.__z_, r502.w \n"
"mov r412.__z_, r503.x \n"
"mov r413.__z_, r503.y \n"
"mov r414.__z_, r503.z \n"
"mov r415.__z_, r503.w \n"
"mov r416.__z_, r504.x \n"
"mov r417.__z_, r504.y \n"
"mov r418.__z_, r504.z \n"
"mov r419.__z_, r504.w \n"
"mov r420.__z_, r505.x \n"
"mov r421.__z_, r505.y \n"
"mov r422.__z_, r505.z \n"
"mov r423.__z_, r505.w \n"
"mov r424.__z_, r506.x \n"
"mov r425.__z_, r506.y \n"
"mov r426.__z_, r506.z \n"
"mov r427.__z_, r506.w \n"
"mov r428.__z_, r507.x \n"
"mov r429.__z_, r507.y \n"
"mov r430.__z_, r507.z \n"
"mov r431.__z_, r507.w \n"
"ret \n"
"endfunc \n"
"func 84 \n"
"mov r500.x___, r400.w \n"
"mov r500._y__, r408.w \n"
"mov r500.__z_, r404.w \n"
"mov r500.___w, r412.w \n"
"mov r501.x___, r416.w \n"
"mov r501._y__, r424.w \n"
"mov r501.__z_, r420.w \n"
"mov r501.___w, r428.w \n"
"mov r502.x___, r402.w \n"
"mov r502._y__, r410.w \n"
"mov r502.__z_, r406.w \n"
"mov r502.___w, r414.w \n"
"mov r503.x___, r418.w \n"
"mov r503._y__, r426.w \n"
"mov r503.__z_, r422.w \n"
"mov r503.___w, r430.w \n"
"mov r504.x___, r401.w \n"
"mov r504._y__, r409.w \n"
"mov r504.__z_, r405.w \n"
"mov r504.___w, r413.w \n"
"mov r505.x___, r417.w \n"
"mov r505._y__, r425.w \n"
"mov r505.__z_, r421.w \n"
"mov r505.___w, r429.w \n"
"mov r506.x___, r403.w \n"
"mov r506._y__, r411.w \n"
"mov r506.__z_, r407.w \n"
"mov r506.___w, r415.w \n"
"mov r507.x___, r419.w \n"
"mov r507._y__, r427.w \n"
"mov r507.__z_, r423.w \n"
"mov r507.___w, r431.w \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 94 \n"
"mov r95, r85.xyxy \n"
"lds_read_vec_neighborExch r500, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r501, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r502, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r503, r95.xyyy \n"
"mov r95, r85.xyxy \n"
"iadd r95._y__, r95.y, l24.x \n"
"lds_read_vec_neighborExch r504, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r505, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r506, r95.xyyy \n"
"iadd r95.x___, r95.x, l24.x \n"
"lds_read_vec_neighborExch r507, r95.xyyy \n"
"mov r400.___w, r500.x \n"
"mov r401.___w, r500.y \n"
"mov r402.___w, r500.z \n"
"mov r403.___w, r500.w \n"
"mov r404.___w, r501.x \n"
"mov r405.___w, r501.y \n"
"mov r406.___w, r501.z \n"
"mov r407.___w, r501.w \n"
"mov r408.___w, r502.x \n"
"mov r409.___w, r502.y \n"
"mov r410.___w, r502.z \n"
"mov r411.___w, r502.w \n"
"mov r412.___w, r503.x \n"
"mov r413.___w, r503.y \n"
"mov r414.___w, r503.z \n"
"mov r415.___w, r503.w \n"
"mov r416.___w, r504.x \n"
"mov r417.___w, r504.y \n"
"mov r418.___w, r504.z \n"
"mov r419.___w, r504.w \n"
"mov r420.___w, r505.x \n"
"mov r421.___w, r505.y \n"
"mov r422.___w, r505.z \n"
"mov r423.___w, r505.w \n"
"mov r424.___w, r506.x \n"
"mov r425.___w, r506.y \n"
"mov r426.___w, r506.z \n"
"mov r427.___w, r506.w \n"
"mov r428.___w, r507.x \n"
"mov r429.___w, r507.y \n"
"mov r430.___w, r507.z \n"
"mov r431.___w, r507.w \n"
"ret \n"
"endfunc \n"
"func 10 \n"
"mul_ieee r100, r200.x, l40 \n"
"mul_ieee r101, r200.x, l41 \n"
"mul_ieee r102, r200.x, l42 \n"
"mul_ieee r103, r200.x, l43 \n"
"mul_ieee r104, r200.x, l44 \n"
"mul_ieee r105, r200.x, l45 \n"
"mul_ieee r106, r200.x, l46 \n"
"mul_ieee r107, r200.x, l47 \n"
"cos_vec r110._yzw, r100 \n"
"cos_vec r111, r101 \n"
"cos_vec r112, r102 \n"
"cos_vec r113, r103 \n"
"cos_vec r114, r104 \n"
"cos_vec r115, r105 \n"
"cos_vec r116, r106 \n"
"cos_vec r117, r107 \n"
"sin_vec r120._yzw, r100 \n"
"sin_vec r121, r101 \n"
"sin_vec r122, r102 \n"
"sin_vec r123, r103 \n"
"sin_vec r124, r104 \n"
"sin_vec r125, r105 \n"
"sin_vec r126, r106 \n"
"sin_vec r127, r107 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"mov r40, r408 \n"
"mov r41.x_z_, r112.x \n"
"mov r41._y_w, r122.x \n"
"call 0 \n"
"mov r408, r40 \n"
"mov r40, r409 \n"
"mov r41.x_z_, r112.y \n"
"mov r41._y_w, r122.y \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41.x_z_, r112.z \n"
"mov r41._y_w, r122.z \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41.x_z_, r112.w \n"
"mov r41._y_w, r122.w \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41.x_z_, r113.x \n"
"mov r41._y_w, r123.x \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41.x_z_, r113.y \n"
"mov r41._y_w, r123.y \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41.x_z_, r113.z \n"
"mov r41._y_w, r123.z \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41.x_z_, r113.w \n"
"mov r41._y_w, r123.w \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r40, r416 \n"
"mov r41.x_z_, r114.x \n"
"mov r41._y_w, r124.x \n"
"call 0 \n"
"mov r416, r40 \n"
"mov r40, r417 \n"
"mov r41.x_z_, r114.y \n"
"mov r41._y_w, r124.y \n"
"call 0 \n"
"mov r417, r40 \n"
"mov r40, r418 \n"
"mov r41.x_z_, r114.z \n"
"mov r41._y_w, r124.z \n"
"call 0 \n"
"mov r418, r40 \n"
"mov r40, r419 \n"
"mov r41.x_z_, r114.w \n"
"mov r41._y_w, r124.w \n"
"call 0 \n"
"mov r419, r40 \n"
"mov r40, r420 \n"
"mov r41.x_z_, r115.x \n"
"mov r41._y_w, r125.x \n"
"call 0 \n"
"mov r420, r40 \n"
"mov r40, r421 \n"
"mov r41.x_z_, r115.y \n"
"mov r41._y_w, r125.y \n"
"call 0 \n"
"mov r421, r40 \n"
"mov r40, r422 \n"
"mov r41.x_z_, r115.z \n"
"mov r41._y_w, r125.z \n"
"call 0 \n"
"mov r422, r40 \n"
"mov r40, r423 \n"
"mov r41.x_z_, r115.w \n"
"mov r41._y_w, r125.w \n"
"call 0 \n"
"mov r423, r40 \n"
"mov r40, r424 \n"
"mov r41.x_z_, r116.x \n"
"mov r41._y_w, r126.x \n"
"call 0 \n"
"mov r424, r40 \n"
"mov r40, r425 \n"
"mov r41.x_z_, r116.y \n"
"mov r41._y_w, r126.y \n"
"call 0 \n"
"mov r425, r40 \n"
"mov r40, r426 \n"
"mov r41.x_z_, r116.z \n"
"mov r41._y_w, r126.z \n"
"call 0 \n"
"mov r426, r40 \n"
"mov r40, r427 \n"
"mov r41.x_z_, r116.w \n"
"mov r41._y_w, r126.w \n"
"call 0 \n"
"mov r427, r40 \n"
"mov r40, r428 \n"
"mov r41.x_z_, r117.x \n"
"mov r41._y_w, r127.x \n"
"call 0 \n"
"mov r428, r40 \n"
"mov r40, r429 \n"
"mov r41.x_z_, r117.y \n"
"mov r41._y_w, r127.y \n"
"call 0 \n"
"mov r429, r40 \n"
"mov r40, r430 \n"
"mov r41.x_z_, r117.z \n"
"mov r41._y_w, r127.z \n"
"call 0 \n"
"mov r430, r40 \n"
"mov r40, r431 \n"
"mov r41.x_z_, r117.w \n"
"mov r41._y_w, r127.w \n"
"call 0 \n"
"mov r431, r40 \n"
"ret \n"
"endfunc \n"
"end \n";

const char _fft1024_tomo_fft_source_[] = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"dcl_lds_size_per_thread 32 \n"
"dcl_lds_sharing_mode _wavefrontRel \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.242077543e-44f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000010 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l3 = (-1.#QNANf, 3.923635700e-44f, 5.605193857e-45f, 1.121038771e-44f, ) \n"
"dcl_literal l3, 0xFFFFFFE3, 0x0000001C, 0x00000004, 0x00000008 \n"
"; l5 = (1024.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l5, 0x44800000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l10 = (0.9238795042f, -0.3826834261f, 0.7071067691f, -0.7071067691f, ) \n"
"dcl_literal l10, 0x3F6C835E, 0xBEC3EF15, 0x3F3504F3, 0xBF3504F3 \n"
"; l11 = (0.3826834261f, -0.9238795042f, -0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l11, 0x3EC3EF15, 0xBF6C835E, 0xBEC3EF15, 0xBF6C835E \n"
"; l12 = (-0.7071067691f, -0.7071067691f, -0.9238795042f, -0.3826834261f, ) \n"
"dcl_literal l12, 0xBF3504F3, 0xBF3504F3, 0xBF6C835E, 0xBEC3EF15 \n"
"; l13 = (0.9807852507f, -0.1950903237f, 0.9238795042f, -0.3826834261f, ) \n"
"dcl_literal l13, 0x3F7B14BE, 0xBE47C5C2, 0x3F6C835E, 0xBEC3EF15 \n"
"; l14 = (0.8314695954f, -0.5555702448f, 0.7071067691f, -0.7071067691f, ) \n"
"dcl_literal l14, 0x3F54DB31, 0xBF0E39DA, 0x3F3504F3, 0xBF3504F3 \n"
"; l15 = (0.5555702448f, -0.8314695954f, 0.3826834261f, -0.9238795042f, ) \n"
"dcl_literal l15, 0x3F0E39DA, 0xBF54DB31, 0x3EC3EF15, 0xBF6C835E \n"
"; l16 = (0.1950903237f, -0.9807852507f, 0.8314695954f, -0.5555702448f, ) \n"
"dcl_literal l16, 0x3E47C5C2, 0xBF7B14BE, 0x3F54DB31, 0xBF0E39DA \n"
"; l17 = (0.3826834261f, -0.9238795042f, -0.1950903237f, -0.9807852507f, ) \n"
"dcl_literal l17, 0x3EC3EF15, 0xBF6C835E, 0xBE47C5C2, 0xBF7B14BE \n"
"; l18 = (-0.7071067691f, -0.7071067691f, -0.9807852507f, -0.1950903237f, ) \n"
"dcl_literal l18, 0xBF3504F3, 0xBF3504F3, 0xBF7B14BE, 0xBE47C5C2 \n"
"; l19 = (-0.9238795042f, 0.3826834261f, -0.5555702448f, 0.8314695954f, ) \n"
"dcl_literal l19, 0xBF6C835E, 0x3EC3EF15, 0xBF0E39DA, 0x3F54DB31 \n"
"; l20 = (0.0f, 1.401298464e-45f, -1.#QNANf, 0.0f, ) \n"
"dcl_literal l20, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000000 \n"
"; l21 = (1.401298464e-45f, 2.802596929e-45f, -1.#QNANf, 1.401298464e-45f, ) \n"
"dcl_literal l21, 0x00000001, 0x00000002, 0xFFFFFFFE, 0x00000001 \n"
"; l22 = (2.802596929e-45f, 5.605193857e-45f, -1.#QNANf, 4.203895393e-45f, ) \n"
"dcl_literal l22, 0x00000002, 0x00000004, 0xFFFFFFFC, 0x00000003 \n"
"; l23 = (4.203895393e-45f, 1.121038771e-44f, -1.#QNANf, 9.809089250e-45f, ) \n"
"dcl_literal l23, 0x00000003, 0x00000008, 0xFFFFFFF8, 0x00000007 \n"
"; l24 = (5.605193857e-45f, 2.242077543e-44f, -1.#QNANf, 2.101947696e-44f, ) \n"
"dcl_literal l24, 0x00000004, 0x00000010, 0xFFFFFFF0, 0x0000000F \n"
"; l25 = (7.006492322e-45f, 4.484155086e-44f, -1.#QNANf, 4.344025239e-44f, ) \n"
"dcl_literal l25, 0x00000005, 0x00000020, 0xFFFFFFE0, 0x0000001F \n"
"; l29 = (1.261168618e-44f, 7.174648137e-43f, 0.0f, 0.0f, ) \n"
"dcl_literal l29, 0x00000009, 0x00000200, 0x00000000, 0x00000000 \n"
"; l30 = (1.541428311e-44f, 7.006492322e-45f, 7.174648137e-43f, 1.121038771e-44f, ) \n"
"dcl_literal l30, 0x0000000B, 0x00000005, 0x00000200, 0x00000008 \n"
"; l40 = (0.0f, -100.5309677f, -50.26548386f, -150.7964478f, ) \n"
"dcl_literal l40, 0x80000000, 0xC2C90FDB, 0xC2490FDB, 0xC316CBE4 \n"
"; l41 = (-25.13274193f, -125.6637039f, -75.39822388f, -175.929184f, ) \n"
"dcl_literal l41, 0xC1C90FDB, 0xC2FB53D1, 0xC296CBE4, 0xC32FEDDF \n"
"; l42 = (-12.56637096f, -113.0973358f, -62.83185196f, -163.3628235f, ) \n"
"dcl_literal l42, 0xC1490FDB, 0xC2E231D6, 0xC27B53D1, 0xC3235CE2 \n"
"; l43 = (-37.69911194f, -138.230072f, -87.96459198f, -188.4955597f, ) \n"
"dcl_literal l43, 0xC216CBE4, 0xC30A3AE6, 0xC2AFEDDF, 0xC33C7EDD \n"
"; l44 = (-6.283185482f, -106.8141479f, -56.54866791f, -157.0796356f, ) \n"
"dcl_literal l44, 0xC0C90FDB, 0xC2D5A0D8, 0xC26231D6, 0xC31D1463 \n"
"; l45 = (-31.41592598f, -131.9468842f, -81.68141174f, -182.2123718f, ) \n"
"dcl_literal l45, 0xC1FB53D1, 0xC303F267, 0xC2A35CE2, 0xC336365E \n"
"; l46 = (-18.84955597f, -119.3805237f, -69.11503601f, -169.6459961f, ) \n"
"dcl_literal l46, 0xC196CBE4, 0xC2EEC2D4, 0xC28A3AE6, 0xC329A560 \n"
"; l47 = (-43.98229599f, -144.5132599f, -94.24777985f, -194.7787476f, ) \n"
"dcl_literal l47, 0xC22FEDDF, 0xC3108365, 0xC2BC7EDD, 0xC342C75C \n"
"ishl r90.x___, vThreadGrpIdFlat0.x, l30.x \n"
"iadd r80.x___, vTidInGrpFlat0.x, r90.x \n"
"call 6 \n"
"call 5 \n"
"and r0.x___, vTidInGrpFlat0.x, l25.w \n"
"itof r80.x___, r0.x \n"
"div_zeroop(fltmax) r80, r80.x, l5.x \n"
"call 10 \n"
"call 81 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 91 \n"
"call 82 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 92 \n"
"call 83 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 93 \n"
"call 84 \n"
"and r75.x___, vTidInGrpFlat0.x, l3.x \n"
"and r75._y__, vTidInGrpFlat0.x, l3.y \n"
"call 94 \n"
"call 5 \n"
"ishl r90._y__, vTidInGrpFlat0.x, l30.y \n"
"iadd r80.x___, r90.x, r90.y \n"
"call 7 \n"
"endmain \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
";FFT2 \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";FFT4 \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8 \n"
"func 4 \n"
"mov r50, r70 \n"
"mov r51, r74 \n"
"call 2 \n"
"mov r70, r50 \n"
"mov r74, r51 \n"
"mov r50, r71 \n"
"mov r51, r75 \n"
"call 2 \n"
"mov r71, r50 \n"
"mov r75, r51 \n"
"mov r50, r72 \n"
"mov r51, r76 \n"
"call 2 \n"
"mov r72, r50 \n"
"mov r76, r51 \n"
"mov r50, r73 \n"
"mov r51, r77 \n"
"call 2 \n"
"mov r73, r50 \n"
"mov r77, r51 \n"
"mov r40, r75 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r75, r40 \n"
"mul_ieee r75, r75, l1.w \n"
"mov r40, r76 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r76, r40 \n"
"mov r40, r77 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r77, r40 \n"
"mul_ieee r77, r77, l1.w \n"
"mov r60, r70 \n"
"mov r61, r71 \n"
"mov r62, r72 \n"
"mov r63, r73 \n"
"call 3 \n"
"mov r70, r60 \n"
"mov r71, r61 \n"
"mov r72, r62 \n"
"mov r73, r63 \n"
"mov r60, r74 \n"
"mov r61, r75 \n"
"mov r62, r76 \n"
"mov r63, r77 \n"
"call 3 \n"
"mov r74, r60 \n"
"mov r75, r61 \n"
"mov r76, r62 \n"
"mov r77, r63 \n"
"ret \n"
"endfunc \n"
";FFT32 \n"
"func 5 \n"
"mov r60, r400 \n"
"mov r61, r408 \n"
"mov r62, r416 \n"
"mov r63, r424 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r408, r61 \n"
"mov r416, r62 \n"
"mov r424, r63 \n"
"mov r60, r401 \n"
"mov r61, r409 \n"
"mov r62, r417 \n"
"mov r63, r425 \n"
"call 3 \n"
"mov r401, r60 \n"
"mov r409, r61 \n"
"mov r417, r62 \n"
"mov r425, r63 \n"
"mov r60, r402 \n"
"mov r61, r410 \n"
"mov r62, r418 \n"
"mov r63, r426 \n"
"call 3 \n"
"mov r402, r60 \n"
"mov r410, r61 \n"
"mov r418, r62 \n"
"mov r426, r63 \n"
"mov r60, r403 \n"
"mov r61, r411 \n"
"mov r62, r419 \n"
"mov r63, r427 \n"
"call 3 \n"
"mov r403, r60 \n"
"mov r411, r61 \n"
"mov r419, r62 \n"
"mov r427, r63 \n"
"mov r60, r404 \n"
"mov r61, r412 \n"
"mov r62, r420 \n"
"mov r63, r428 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r412, r61 \n"
"mov r420, r62 \n"
"mov r428, r63 \n"
"mov r60, r405 \n"
"mov r61, r413 \n"
"mov r62, r421 \n"
"mov r63, r429 \n"
"call 3 \n"
"mov r405, r60 \n"
"mov r413, r61 \n"
"mov r421, r62 \n"
"mov r429, r63 \n"
"mov r60, r406 \n"
"mov r61, r414 \n"
"mov r62, r422 \n"
"mov r63, r430 \n"
"call 3 \n"
"mov r406, r60 \n"
"mov r414, r61 \n"
"mov r422, r62 \n"
"mov r430, r63 \n"
"mov r60, r407 \n"
"mov r61, r415 \n"
"mov r62, r423 \n"
"mov r63, r431 \n"
"call 3 \n"
"mov r407, r60 \n"
"mov r415, r61 \n"
"mov r423, r62 \n"
"mov r431, r63 \n"
"mov r40, r409 \n"
"mov r41, l10.xyxy \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41, l10.zwzw \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41, l11.xyxy \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41, l11.zwzw \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41, l12.xyxy \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41, l12.zwzw \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r40, r417 \n"
"mov r41, l13.xyxy \n"
"call 0 \n"
"mov r417, r40 \n"
"mov r40, r418 \n"
"mov r41, l13.zwzw \n"
"call 0 \n"
"mov r418, r40 \n"
"mov r40, r419 \n"
"mov r41, l14.xyxy \n"
"call 0 \n"
"mov r419, r40 \n"
"mov r40, r420 \n"
"mov r41, l14.zwzw \n"
"call 0 \n"
"mov r420, r40 \n"
"mov r40, r421 \n"
"mov r41, l15.xyxy \n"
"call 0 \n"
"mov r421, r40 \n"
"mov r40, r422 \n"
"mov r41, l15.zwzw \n"
"call 0 \n"
"mov r422, r40 \n"
"mov r40, r423 \n"
"mov r41, l16.xyxy \n"
"call 0 \n"
"mov r423, r40 \n"
"mov r40, r425 \n"
"mov r41, l16.zwzw \n"
"call 0 \n"
"mov r425, r40 \n"
"mov r40, r426 \n"
"mov r41, l17.xyxy \n"
"call 0 \n"
"mov r426, r40 \n"
"mov r40, r427 \n"
"mov r41, l17.zwzw \n"
"call 0 \n"
"mov r427, r40 \n"
"mov r40, r428 \n"
"mov r41, l18.xyxy \n"
"call 0 \n"
"mov r428, r40 \n"
"mov r40, r429 \n"
"mov r41, l18.zwzw \n"
"call 0 \n"
"mov r429, r40 \n"
"mov r40, r430 \n"
"mov r41, l19.xyxy \n"
"call 0 \n"
"mov r430, r40 \n"
"mov r40, r431 \n"
"mov r41, l19.zwzw \n"
"call 0 \n"
"mov r431, r40 \n"
"mov r70, r400 \n"
"mov r71, r401 \n"
"mov r72, r402 \n"
"mov r73, r403 \n"
"mov r74, r404 \n"
"mov r75, r405 \n"
"mov r76, r406 \n"
"mov r77, r407 \n"
"call 4 \n"
"mov r400, r70 \n"
"mov r401, r71 \n"
"mov r402, r72 \n"
"mov r403, r73 \n"
"mov r404, r74 \n"
"mov r405, r75 \n"
"mov r406, r76 \n"
"mov r407, r77 \n"
"mov r70, r408 \n"
"mov r71, r409 \n"
"mov r72, r410 \n"
"mov r73, r411 \n"
"mov r74, r412 \n"
"mov r75, r413 \n"
"mov r76, r414 \n"
"mov r77, r415 \n"
"call 4 \n"
"mov r408, r70 \n"
"mov r409, r71 \n"
"mov r410, r72 \n"
"mov r411, r73 \n"
"mov r412, r74 \n"
"mov r413, r75 \n"
"mov r414, r76 \n"
"mov r415, r77 \n"
"mov r70, r416 \n"
"mov r71, r417 \n"
"mov r72, r418 \n"
"mov r73, r419 \n"
"mov r74, r420 \n"
"mov r75, r421 \n"
"mov r76, r422 \n"
"mov r77, r423 \n"
"call 4 \n"
"mov r416, r70 \n"
"mov r417, r71 \n"
"mov r418, r72 \n"
"mov r419, r73 \n"
"mov r420, r74 \n"
"mov r421, r75 \n"
"mov r422, r76 \n"
"mov r423, r77 \n"
"mov r70, r424 \n"
"mov r71, r425 \n"
"mov r72, r426 \n"
"mov r73, r427 \n"
"mov r74, r428 \n"
"mov r75, r429 \n"
"mov r76, r430 \n"
"mov r77, r431 \n"
"call 4 \n"
"mov r424, r70 \n"
"mov r425, r71 \n"
"mov r426, r72 \n"
"mov r427, r73 \n"
"mov r428, r74 \n"
"mov r429, r75 \n"
"mov r430, r76 \n"
"mov r431, r77 \n"
"ret \n"
"endfunc \n"
"func 6 \n"
"mov r80._y__, r80.x \n"
"mov r400, g[r80.y+0] \n"
"mov r401, g[r80.y+64] \n"
"mov r402, g[r80.y+128] \n"
"mov r403, g[r80.y+192] \n"
"mov r404, g[r80.y+256] \n"
"mov r405, g[r80.y+320] \n"
"mov r406, g[r80.y+384] \n"
"mov r407, g[r80.y+448] \n"
"iadd r80._y__, r80.y, l30.z \n"
"mov r408, g[r80.y+0] \n"
"mov r409, g[r80.y+64] \n"
"mov r410, g[r80.y+128] \n"
"mov r411, g[r80.y+192] \n"
"mov r412, g[r80.y+256] \n"
"mov r413, g[r80.y+320] \n"
"mov r414, g[r80.y+384] \n"
"mov r415, g[r80.y+448] \n"
"iadd r80._y__, r80.y, l30.z \n"
"mov r416, g[r80.y+0] \n"
"mov r417, g[r80.y+64] \n"
"mov r418, g[r80.y+128] \n"
"mov r419, g[r80.y+192] \n"
"mov r420, g[r80.y+256] \n"
"mov r421, g[r80.y+320] \n"
"mov r422, g[r80.y+384] \n"
"mov r423, g[r80.y+448] \n"
"iadd r80._y__, r80.y, l30.z \n"
"mov r424, g[r80.y+0] \n"
"mov r425, g[r80.y+64] \n"
"mov r426, g[r80.y+128] \n"
"mov r427, g[r80.y+192] \n"
"mov r428, g[r80.y+256] \n"
"mov r429, g[r80.y+320] \n"
"mov r430, g[r80.y+384] \n"
"mov r431, g[r80.y+448] \n"
"ret \n"
"endfunc \n"
"func 7 \n"
"mov g[r80.x+0], r400 \n"
"mov g[r80.x+1], r416 \n"
"mov g[r80.x+2], r408 \n"
"mov g[r80.x+3], r424 \n"
"mov g[r80.x+4], r404 \n"
"mov g[r80.x+5], r420 \n"
"mov g[r80.x+6], r412 \n"
"mov g[r80.x+7], r428 \n"
"mov g[r80.x+8], r402 \n"
"mov g[r80.x+9], r418 \n"
"mov g[r80.x+10], r410 \n"
"mov g[r80.x+11], r426 \n"
"mov g[r80.x+12], r406 \n"
"mov g[r80.x+13], r422 \n"
"mov g[r80.x+14], r414 \n"
"mov g[r80.x+15], r430 \n"
"mov g[r80.x+16], r401 \n"
"mov g[r80.x+17], r417 \n"
"mov g[r80.x+18], r409 \n"
"mov g[r80.x+19], r425 \n"
"mov g[r80.x+20], r405 \n"
"mov g[r80.x+21], r421 \n"
"mov g[r80.x+22], r413 \n"
"mov g[r80.x+23], r429 \n"
"mov g[r80.x+24], r403 \n"
"mov g[r80.x+25], r419 \n"
"mov g[r80.x+26], r411 \n"
"mov g[r80.x+27], r427 \n"
"mov g[r80.x+28], r407 \n"
"mov g[r80.x+29], r423 \n"
"mov g[r80.x+30], r415 \n"
"mov g[r80.x+31], r431 \n"
"ret \n"
"endfunc \n"
"func 81 \n"
"mov r500.x___, r400.x \n"
"mov r500._y__, r416.x \n"
"mov r500.__z_, r408.x \n"
"mov r500.___w, r424.x \n"
"mov r501.x___, r404.x \n"
"mov r501._y__, r420.x \n"
"mov r501.__z_, r412.x \n"
"mov r501.___w, r428.x \n"
"mov r502.x___, r402.x \n"
"mov r502._y__, r418.x \n"
"mov r502.__z_, r410.x \n"
"mov r502.___w, r426.x \n"
"mov r503.x___, r406.x \n"
"mov r503._y__, r422.x \n"
"mov r503.__z_, r414.x \n"
"mov r503.___w, r430.x \n"
"mov r504.x___, r401.x \n"
"mov r504._y__, r417.x \n"
"mov r504.__z_, r409.x \n"
"mov r504.___w, r425.x \n"
"mov r505.x___, r405.x \n"
"mov r505._y__, r421.x \n"
"mov r505.__z_, r413.x \n"
"mov r505.___w, r429.x \n"
"mov r506.x___, r403.x \n"
"mov r506._y__, r419.x \n"
"mov r506.__z_, r411.x \n"
"mov r506.___w, r427.x \n"
"mov r507.x___, r407.x \n"
"mov r507._y__, r423.x \n"
"mov r507.__z_, r415.x \n"
"mov r507.___w, r431.x \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 91 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r504, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r505, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r506, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r507, r75.xyyy \n"
"mov r400.x___, r500.x \n"
"mov r401.x___, r500.y \n"
"mov r402.x___, r500.z \n"
"mov r403.x___, r500.w \n"
"mov r404.x___, r501.x \n"
"mov r405.x___, r501.y \n"
"mov r406.x___, r501.z \n"
"mov r407.x___, r501.w \n"
"mov r408.x___, r502.x \n"
"mov r409.x___, r502.y \n"
"mov r410.x___, r502.z \n"
"mov r411.x___, r502.w \n"
"mov r412.x___, r503.x \n"
"mov r413.x___, r503.y \n"
"mov r414.x___, r503.z \n"
"mov r415.x___, r503.w \n"
"mov r416.x___, r504.x \n"
"mov r417.x___, r504.y \n"
"mov r418.x___, r504.z \n"
"mov r419.x___, r504.w \n"
"mov r420.x___, r505.x \n"
"mov r421.x___, r505.y \n"
"mov r422.x___, r505.z \n"
"mov r423.x___, r505.w \n"
"mov r424.x___, r506.x \n"
"mov r425.x___, r506.y \n"
"mov r426.x___, r506.z \n"
"mov r427.x___, r506.w \n"
"mov r428.x___, r507.x \n"
"mov r429.x___, r507.y \n"
"mov r430.x___, r507.z \n"
"mov r431.x___, r507.w \n"
"ret \n"
"endfunc \n"
"func 82 \n"
"mov r500.x___, r400.y \n"
"mov r500._y__, r416.y \n"
"mov r500.__z_, r408.y \n"
"mov r500.___w, r424.y \n"
"mov r501.x___, r404.y \n"
"mov r501._y__, r420.y \n"
"mov r501.__z_, r412.y \n"
"mov r501.___w, r428.y \n"
"mov r502.x___, r402.y \n"
"mov r502._y__, r418.y \n"
"mov r502.__z_, r410.y \n"
"mov r502.___w, r426.y \n"
"mov r503.x___, r406.y \n"
"mov r503._y__, r422.y \n"
"mov r503.__z_, r414.y \n"
"mov r503.___w, r430.y \n"
"mov r504.x___, r401.y \n"
"mov r504._y__, r417.y \n"
"mov r504.__z_, r409.y \n"
"mov r504.___w, r425.y \n"
"mov r505.x___, r405.y \n"
"mov r505._y__, r421.y \n"
"mov r505.__z_, r413.y \n"
"mov r505.___w, r429.y \n"
"mov r506.x___, r403.y \n"
"mov r506._y__, r419.y \n"
"mov r506.__z_, r411.y \n"
"mov r506.___w, r427.y \n"
"mov r507.x___, r407.y \n"
"mov r507._y__, r423.y \n"
"mov r507.__z_, r415.y \n"
"mov r507.___w, r431.y \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 92 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r504, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r505, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r506, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r507, r75.xyyy \n"
"mov r400._y__, r500.x \n"
"mov r401._y__, r500.y \n"
"mov r402._y__, r500.z \n"
"mov r403._y__, r500.w \n"
"mov r404._y__, r501.x \n"
"mov r405._y__, r501.y \n"
"mov r406._y__, r501.z \n"
"mov r407._y__, r501.w \n"
"mov r408._y__, r502.x \n"
"mov r409._y__, r502.y \n"
"mov r410._y__, r502.z \n"
"mov r411._y__, r502.w \n"
"mov r412._y__, r503.x \n"
"mov r413._y__, r503.y \n"
"mov r414._y__, r503.z \n"
"mov r415._y__, r503.w \n"
"mov r416._y__, r504.x \n"
"mov r417._y__, r504.y \n"
"mov r418._y__, r504.z \n"
"mov r419._y__, r504.w \n"
"mov r420._y__, r505.x \n"
"mov r421._y__, r505.y \n"
"mov r422._y__, r505.z \n"
"mov r423._y__, r505.w \n"
"mov r424._y__, r506.x \n"
"mov r425._y__, r506.y \n"
"mov r426._y__, r506.z \n"
"mov r427._y__, r506.w \n"
"mov r428._y__, r507.x \n"
"mov r429._y__, r507.y \n"
"mov r430._y__, r507.z \n"
"mov r431._y__, r507.w \n"
"ret \n"
"endfunc \n"
"func 83 \n"
"mov r500.x___, r400.z \n"
"mov r500._y__, r416.z \n"
"mov r500.__z_, r408.z \n"
"mov r500.___w, r424.z \n"
"mov r501.x___, r404.z \n"
"mov r501._y__, r420.z \n"
"mov r501.__z_, r412.z \n"
"mov r501.___w, r428.z \n"
"mov r502.x___, r402.z \n"
"mov r502._y__, r418.z \n"
"mov r502.__z_, r410.z \n"
"mov r502.___w, r426.z \n"
"mov r503.x___, r406.z \n"
"mov r503._y__, r422.z \n"
"mov r503.__z_, r414.z \n"
"mov r503.___w, r430.z \n"
"mov r504.x___, r401.z \n"
"mov r504._y__, r417.z \n"
"mov r504.__z_, r409.z \n"
"mov r504.___w, r425.z \n"
"mov r505.x___, r405.z \n"
"mov r505._y__, r421.z \n"
"mov r505.__z_, r413.z \n"
"mov r505.___w, r429.z \n"
"mov r506.x___, r403.z \n"
"mov r506._y__, r419.z \n"
"mov r506.__z_, r411.z \n"
"mov r506.___w, r427.z \n"
"mov r507.x___, r407.z \n"
"mov r507._y__, r423.z \n"
"mov r507.__z_, r415.z \n"
"mov r507.___w, r431.z \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 93 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r504, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r505, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r506, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r507, r75.xyyy \n"
"mov r400.__z_, r500.x \n"
"mov r401.__z_, r500.y \n"
"mov r402.__z_, r500.z \n"
"mov r403.__z_, r500.w \n"
"mov r404.__z_, r501.x \n"
"mov r405.__z_, r501.y \n"
"mov r406.__z_, r501.z \n"
"mov r407.__z_, r501.w \n"
"mov r408.__z_, r502.x \n"
"mov r409.__z_, r502.y \n"
"mov r410.__z_, r502.z \n"
"mov r411.__z_, r502.w \n"
"mov r412.__z_, r503.x \n"
"mov r413.__z_, r503.y \n"
"mov r414.__z_, r503.z \n"
"mov r415.__z_, r503.w \n"
"mov r416.__z_, r504.x \n"
"mov r417.__z_, r504.y \n"
"mov r418.__z_, r504.z \n"
"mov r419.__z_, r504.w \n"
"mov r420.__z_, r505.x \n"
"mov r421.__z_, r505.y \n"
"mov r422.__z_, r505.z \n"
"mov r423.__z_, r505.w \n"
"mov r424.__z_, r506.x \n"
"mov r425.__z_, r506.y \n"
"mov r426.__z_, r506.z \n"
"mov r427.__z_, r506.w \n"
"mov r428.__z_, r507.x \n"
"mov r429.__z_, r507.y \n"
"mov r430.__z_, r507.z \n"
"mov r431.__z_, r507.w \n"
"ret \n"
"endfunc \n"
"func 84 \n"
"mov r500.x___, r400.w \n"
"mov r500._y__, r416.w \n"
"mov r500.__z_, r408.w \n"
"mov r500.___w, r424.w \n"
"mov r501.x___, r404.w \n"
"mov r501._y__, r420.w \n"
"mov r501.__z_, r412.w \n"
"mov r501.___w, r428.w \n"
"mov r502.x___, r402.w \n"
"mov r502._y__, r418.w \n"
"mov r502.__z_, r410.w \n"
"mov r502.___w, r426.w \n"
"mov r503.x___, r406.w \n"
"mov r503._y__, r422.w \n"
"mov r503.__z_, r414.w \n"
"mov r503.___w, r430.w \n"
"mov r504.x___, r401.w \n"
"mov r504._y__, r417.w \n"
"mov r504.__z_, r409.w \n"
"mov r504.___w, r425.w \n"
"mov r505.x___, r405.w \n"
"mov r505._y__, r421.w \n"
"mov r505.__z_, r413.w \n"
"mov r505.___w, r429.w \n"
"mov r506.x___, r403.w \n"
"mov r506._y__, r419.w \n"
"mov r506.__z_, r411.w \n"
"mov r506.___w, r427.w \n"
"mov r507.x___, r407.w \n"
"mov r507._y__, r423.w \n"
"mov r507.__z_, r415.w \n"
"mov r507.___w, r431.w \n"
"lds_write_vec mem0, r500 \n"
"lds_write_vec_lOffset(4) mem0, r501 \n"
"lds_write_vec_lOffset(8) mem0, r502 \n"
"lds_write_vec_lOffset(12) mem0, r503 \n"
"lds_write_vec_lOffset(16) mem0, r504 \n"
"lds_write_vec_lOffset(20) mem0, r505 \n"
"lds_write_vec_lOffset(24) mem0, r506 \n"
"lds_write_vec_lOffset(28) mem0, r507 \n"
"fence_lds \n"
"ret \n"
"endfunc \n"
"func 94 \n"
"lds_read_vec_neighborExch r500, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r501, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r502, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r503, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r504, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r505, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r506, r75.xyyy \n"
"iadd r75.x___, r75.x, l24.x \n"
"lds_read_vec_neighborExch r507, r75.xyyy \n"
"mov r400.___w, r500.x \n"
"mov r401.___w, r500.y \n"
"mov r402.___w, r500.z \n"
"mov r403.___w, r500.w \n"
"mov r404.___w, r501.x \n"
"mov r405.___w, r501.y \n"
"mov r406.___w, r501.z \n"
"mov r407.___w, r501.w \n"
"mov r408.___w, r502.x \n"
"mov r409.___w, r502.y \n"
"mov r410.___w, r502.z \n"
"mov r411.___w, r502.w \n"
"mov r412.___w, r503.x \n"
"mov r413.___w, r503.y \n"
"mov r414.___w, r503.z \n"
"mov r415.___w, r503.w \n"
"mov r416.___w, r504.x \n"
"mov r417.___w, r504.y \n"
"mov r418.___w, r504.z \n"
"mov r419.___w, r504.w \n"
"mov r420.___w, r505.x \n"
"mov r421.___w, r505.y \n"
"mov r422.___w, r505.z \n"
"mov r423.___w, r505.w \n"
"mov r424.___w, r506.x \n"
"mov r425.___w, r506.y \n"
"mov r426.___w, r506.z \n"
"mov r427.___w, r506.w \n"
"mov r428.___w, r507.x \n"
"mov r429.___w, r507.y \n"
"mov r430.___w, r507.z \n"
"mov r431.___w, r507.w \n"
"ret \n"
"endfunc \n"
"func 10 \n"
"mul_ieee r100, r80.x, l40 \n"
"mul_ieee r101, r80.x, l41 \n"
"mul_ieee r102, r80.x, l42 \n"
"mul_ieee r103, r80.x, l43 \n"
"mul_ieee r104, r80.x, l44 \n"
"mul_ieee r105, r80.x, l45 \n"
"mul_ieee r106, r80.x, l46 \n"
"mul_ieee r107, r80.x, l47 \n"
"cos_vec r110._yzw, r100 \n"
"cos_vec r111, r101 \n"
"cos_vec r112, r102 \n"
"cos_vec r113, r103 \n"
"cos_vec r114, r104 \n"
"cos_vec r115, r105 \n"
"cos_vec r116, r106 \n"
"cos_vec r117, r107 \n"
"sin_vec r120._yzw, r100 \n"
"sin_vec r121, r101 \n"
"sin_vec r122, r102 \n"
"sin_vec r123, r103 \n"
"sin_vec r124, r104 \n"
"sin_vec r125, r105 \n"
"sin_vec r126, r106 \n"
"sin_vec r127, r107 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"mov r40, r408 \n"
"mov r41.x_z_, r112.x \n"
"mov r41._y_w, r122.x \n"
"call 0 \n"
"mov r408, r40 \n"
"mov r40, r409 \n"
"mov r41.x_z_, r112.y \n"
"mov r41._y_w, r122.y \n"
"call 0 \n"
"mov r409, r40 \n"
"mov r40, r410 \n"
"mov r41.x_z_, r112.z \n"
"mov r41._y_w, r122.z \n"
"call 0 \n"
"mov r410, r40 \n"
"mov r40, r411 \n"
"mov r41.x_z_, r112.w \n"
"mov r41._y_w, r122.w \n"
"call 0 \n"
"mov r411, r40 \n"
"mov r40, r412 \n"
"mov r41.x_z_, r113.x \n"
"mov r41._y_w, r123.x \n"
"call 0 \n"
"mov r412, r40 \n"
"mov r40, r413 \n"
"mov r41.x_z_, r113.y \n"
"mov r41._y_w, r123.y \n"
"call 0 \n"
"mov r413, r40 \n"
"mov r40, r414 \n"
"mov r41.x_z_, r113.z \n"
"mov r41._y_w, r123.z \n"
"call 0 \n"
"mov r414, r40 \n"
"mov r40, r415 \n"
"mov r41.x_z_, r113.w \n"
"mov r41._y_w, r123.w \n"
"call 0 \n"
"mov r415, r40 \n"
"mov r40, r416 \n"
"mov r41.x_z_, r114.x \n"
"mov r41._y_w, r124.x \n"
"call 0 \n"
"mov r416, r40 \n"
"mov r40, r417 \n"
"mov r41.x_z_, r114.y \n"
"mov r41._y_w, r124.y \n"
"call 0 \n"
"mov r417, r40 \n"
"mov r40, r418 \n"
"mov r41.x_z_, r114.z \n"
"mov r41._y_w, r124.z \n"
"call 0 \n"
"mov r418, r40 \n"
"mov r40, r419 \n"
"mov r41.x_z_, r114.w \n"
"mov r41._y_w, r124.w \n"
"call 0 \n"
"mov r419, r40 \n"
"mov r40, r420 \n"
"mov r41.x_z_, r115.x \n"
"mov r41._y_w, r125.x \n"
"call 0 \n"
"mov r420, r40 \n"
"mov r40, r421 \n"
"mov r41.x_z_, r115.y \n"
"mov r41._y_w, r125.y \n"
"call 0 \n"
"mov r421, r40 \n"
"mov r40, r422 \n"
"mov r41.x_z_, r115.z \n"
"mov r41._y_w, r125.z \n"
"call 0 \n"
"mov r422, r40 \n"
"mov r40, r423 \n"
"mov r41.x_z_, r115.w \n"
"mov r41._y_w, r125.w \n"
"call 0 \n"
"mov r423, r40 \n"
"mov r40, r424 \n"
"mov r41.x_z_, r116.x \n"
"mov r41._y_w, r126.x \n"
"call 0 \n"
"mov r424, r40 \n"
"mov r40, r425 \n"
"mov r41.x_z_, r116.y \n"
"mov r41._y_w, r126.y \n"
"call 0 \n"
"mov r425, r40 \n"
"mov r40, r426 \n"
"mov r41.x_z_, r116.z \n"
"mov r41._y_w, r126.z \n"
"call 0 \n"
"mov r426, r40 \n"
"mov r40, r427 \n"
"mov r41.x_z_, r116.w \n"
"mov r41._y_w, r126.w \n"
"call 0 \n"
"mov r427, r40 \n"
"mov r40, r428 \n"
"mov r41.x_z_, r117.x \n"
"mov r41._y_w, r127.x \n"
"call 0 \n"
"mov r428, r40 \n"
"mov r40, r429 \n"
"mov r41.x_z_, r117.y \n"
"mov r41._y_w, r127.y \n"
"call 0 \n"
"mov r429, r40 \n"
"mov r40, r430 \n"
"mov r41.x_z_, r117.z \n"
"mov r41._y_w, r127.z \n"
"call 0 \n"
"mov r430, r40 \n"
"mov r40, r431 \n"
"mov r41.x_z_, r117.w \n"
"mov r41._y_w, r127.w \n"
"call 0 \n"
"mov r431, r40 \n"
"ret \n"
"endfunc \n"
"end \n";

static const char* _fft2048_fft8_tomo_source_ = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (4.203895393e-45f, -1.#QNANf, 8.407790786e-45f, 1.261168618e-44f, ) \n"
"dcl_literal l2, 0x00000003, 0xFFFFFFFC, 0x00000006, 0x00000009 \n"
"; l3 = (2048.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x45000000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l4 = (2.101947696e-44f, -1.#QNANf, 0.0f, 0.0f, ) \n"
"dcl_literal l4, 0x0000000F, 0xFFFFFFF0, 0x00000000, 0x00000000 \n"
"; l10 = (0.0f, -25.13274193f, -12.56637096f, -37.69911194f, ) \n"
"dcl_literal l10, 0x80000000, 0xC1C90FDB, 0xC1490FDB, 0xC216CBE4 \n"
"; l11 = (-6.283185482f, -31.41592598f, -18.84955597f, -43.98229599f, ) \n"
"dcl_literal l11, 0xC0C90FDB, 0xC1FB53D1, 0xC196CBE4, 0xC22FEDDF \n"
"and r70.x___, vThreadGrpIdFlat0.x, l2.x \n"
"and r70._y__, vThreadGrpIdFlat0.x, l2.y \n"
"ishl r70.x___, r70.x, l2.z \n"
"ishl r70._y__, r70.y, l2.w \n"
"iadd r70.__z_, r70.x, vTidInGrpFlat0.x \n"
"iadd r70.x___, r70.y, r70.z \n"
"call 5 \n"
"call 4 \n"
"itof r80.x___, r70.z \n"
"div_zeroop(fltmax) r80, r80.x, l3.x \n"
"call 7 \n"
"and r70.x___, r70.z, l4.x \n"
"and r70.___w, r70.z, l4.y \n"
"ishl r70.___w, r70.w, l0.w \n"
"iadd r70.x___, r70.x, r70.w \n"
"iadd r70.x___, r70.x, r70.y \n"
"call 6 \n"
"endmain \n"
";twiddle 8 \n"
"func 7 \n"
"mul_ieee r100, r80, l10 \n"
"mul_ieee r101, r80, l11 \n"
"cos_vec r110, r100 \n"
"cos_vec r111, r101 \n"
"sin_vec r120, r100 \n"
"sin_vec r121, r101 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r40, r404 \n"
"mov r41.x_z_, r111.x \n"
"mov r41._y_w, r121.x \n"
"call 0 \n"
"mov r404, r40 \n"
"mov r40, r405 \n"
"mov r41.x_z_, r111.y \n"
"mov r41._y_w, r121.y \n"
"call 0 \n"
"mov r405, r40 \n"
"mov r40, r406 \n"
"mov r41.x_z_, r111.z \n"
"mov r41._y_w, r121.z \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41.x_z_, r111.w \n"
"mov r41._y_w, r121.w \n"
"call 0 \n"
"mov r407, r40 \n"
"ret \n"
"endfunc \n"
";mov global buffer to register, interval is 256 \n"
"func 5 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+256] \n"
"mov r402, g[r70.x+512] \n"
"mov r403, g[r70.x+768] \n"
"mov r404, g[r70.x+1024] \n"
"mov r405, g[r70.x+1280] \n"
"mov r406, g[r70.x+1536] \n"
"mov r407, g[r70.x+1792] \n"
"ret \n"
"endfunc \n"
";mov register to global buffer, interval is 512 \n"
"func 6 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+16], r404 \n"
"mov g[r70.x+32], r402 \n"
"mov g[r70.x+48], r406 \n"
"mov g[r70.x+1024], r401 \n"
"mov g[r70.x+1040], r405 \n"
"mov g[r70.x+1056], r403 \n"
"mov g[r70.x+1072], r407 \n"
"ret \n"
"endfunc \n"
";FFT2. \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";FFT4. \n"
"func 3 \n"
"mov r50, r60 \n"
"mov r51, r62 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r62, r51 \n"
"mov r50, r61 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r61, r50 \n"
"mov r63, r51 \n"
"mov r40, r63 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r63, r40 \n"
"mov r50, r60 \n"
"mov r51, r61 \n"
"call 2 \n"
"mov r60, r50 \n"
"mov r61, r51 \n"
"mov r50, r62 \n"
"mov r51, r63 \n"
"call 2 \n"
"mov r62, r50 \n"
"mov r63, r51 \n"
"ret \n"
"endfunc \n"
";FFT8. \n"
"func 4 \n"
"mov r50, r400 \n"
"mov r51, r404 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r404, r51 \n"
"mov r50, r401 \n"
"mov r51, r405 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r405, r51 \n"
"mov r50, r402 \n"
"mov r51, r406 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r406, r51 \n"
"mov r50, r403 \n"
"mov r51, r407 \n"
"call 2 \n"
"mov r403, r50 \n"
"mov r407, r51 \n"
"mov r40, r405 \n"
"mov r41, l1.yzyz \n"
"call 0 \n"
"mov r405, r40 \n"
"mul_ieee r405, r405, l1.w \n"
"mov r40, r406 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r406, r40 \n"
"mov r40, r407 \n"
"mov r41, l1.z \n"
"call 0 \n"
"mov r407, r40 \n"
"mul_ieee r407, r407, l1.w \n"
"mov r60, r400 \n"
"mov r61, r401 \n"
"mov r62, r402 \n"
"mov r63, r403 \n"
"call 3 \n"
"mov r400, r60 \n"
"mov r401, r61 \n"
"mov r402, r62 \n"
"mov r403, r63 \n"
"mov r60, r404 \n"
"mov r61, r405 \n"
"mov r62, r406 \n"
"mov r63, r407 \n"
"call 3 \n"
"mov r404, r60 \n"
"mov r405, r61 \n"
"mov r406, r62 \n"
"mov r407, r63 \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"end \n";

const char _fft2048_fft4_tomo_source_[] = 
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (9.809089250e-45f, -1.#QNANf, 8.407790786e-45f, 1.121038771e-44f, ) \n"
"dcl_literal l2, 0x00000007, 0xFFFFFFF8, 0x00000006, 0x00000008 \n"
"; l3 = (2048.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x45000000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l4 = (0.0f, -12.56637096f, -6.283185482f, -18.84955597f, ) \n"
"dcl_literal l4, 0x80000000, 0xC1490FDB, 0xC0C90FDB, 0xC196CBE4 \n"
"and r70.x___, vThreadGrpIdFlat0.x, l2.x \n"
"and r70._y__, vThreadGrpIdFlat0.x, l2.y \n"
"ishl r70.x___, r70.x, l2.z \n"
"ishl r70._y__, r70.y, l2.w \n"
"iadd r70.__z_, r70.x, vTidInGrpFlat0.x \n"
"iadd r70.x___, r70.y, r70.z \n"
"call 4 \n"
"call 3 \n"
"call 6 \n"
"call 5 \n"
"endmain \n"
";twiddle 4 \n"
"func 6 \n"
"itof r100.x___, r70.z \n"
"div_zeroop(fltmax) r100, r100.x, l3.x \n"
"mul_ieee r100, r100, l4 \n"
"cos_vec r110, r100 \n"
"sin_vec r120, r100 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"ret \n"
"endfunc \n"
";mov global buffer to register, interval is 512 \n"
"func 4 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+512] \n"
"mov r402, g[r70.x+1024] \n"
"mov r403, g[r70.x+1536] \n"
"ret \n"
"endfunc \n"
";mov register to global buffer, interval is 512 \n"
"func 5 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+1024], r401 \n"
"mov g[r70.x+512], r402 \n"
"mov g[r70.x+1536], r403 \n"
"ret \n"
"endfunc \n"
";FFT2. \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";FFT4. \n"
"func 3 \n"
"mov r50, r400 \n"
"mov r51, r402 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r402, r51 \n"
"mov r50, r401 \n"
"mov r51, r403 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r403, r51 \n"
"mov r40, r403 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r50, r400 \n"
"mov r51, r401 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r401, r51 \n"
"mov r50, r402 \n"
"mov r51, r403 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r403, r51 \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"end \n";

static const CALchar * _transpose2048_tomo_fft_source_ =
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l1 = (2.802596929e-45f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l1, 0x00000002, 0x00000000, 0x00000000, 0x00000000 \n"
"; l2 = (9.809089250e-45f, -1.#QNANf, 8.407790786e-45f, 1.121038771e-44f, ) \n"
"dcl_literal l2, 0x00000007, 0xFFFFFFF8, 0x00000006, 0x00000008 \n"
"and r70.x___, vThreadGrpIdFlat0.x, l2.x \n"
"and r70._y__, vThreadGrpIdFlat0.x, l2.y \n"
"ishl r70.x___, r70.x, l2.z \n"
"ishl r70._y__, r70.y, l2.w \n"
"iadd r70.__z_, r70.x, vTidInGrpFlat0.x \n"
"iadd r70.x___, r70.y, r70.z \n"
"call 4 \n"
"fence_memory \n"
"ishl r70.x___, vAbsTidFlat0.x, l1.x \n"
"call 5 \n"
"endmain \n"
";mov global buffer to register, interval is 512 \n"
"func 4 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+512] \n"
"mov r402, g[r70.x+1024] \n"
"mov r403, g[r70.x+1536] \n"
"ret \n"
"endfunc \n"
";mov register to global buffer, interval is 512 \n"
"func 5 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+1], r401 \n"
"mov g[r70.x+2], r402 \n"
"mov g[r70.x+3], r403 \n"
"ret \n"
"endfunc \n"
"end \n";

static const CALchar * _simple_compute_tomo_fft_source_ =
"il_cs_2_0 \n"
"dcl_num_thread_per_group 64 \n"
"; l0 = (0.0f, 1.401298464e-45f, -1.#QNANf, 2.802596929e-45f, ) \n"
"dcl_literal l0, 0x00000000, 0x00000001, 0xFFFFFFFF, 0x00000002 \n"
"; l1 = (0.0f, 1.0f, -1.0f, 0.7071067691f, ) \n"
"dcl_literal l1, 0x00000000, 0x3F800000, 0xBF800000, 0x3F3504F3 \n"
"; l2 = (9.809089250e-45f, -1.#QNANf, 8.407790786e-45f, 1.121038771e-44f, ) \n"
"dcl_literal l2, 0x00000007, 0xFFFFFFF8, 0x00000006, 0x00000008 \n"
"; l3 = (2048.0f, 0.0f, 0.0f, 0.0f, ) \n"
"dcl_literal l3, 0x45000000, 0x00000000, 0x00000000, 0x00000000 \n"
"; l4 = (0.0f, -12.56637096f, -6.283185482f, -18.84955597f, ) \n"
"dcl_literal l4, 0x80000000, 0xC1490FDB, 0xC0C90FDB, 0xC196CBE4 \n"
"and r70.x___, vThreadGrpIdFlat0.x, l2.x \n"
"and r70._y__, vThreadGrpIdFlat0.x, l2.y \n"
"ishl r70.x___, r70.x, l2.z \n"
"ishl r70._y__, r70.y, l2.w \n"
"iadd r70.__z_, r70.x, vTidInGrpFlat0.x \n"
"iadd r70.x___, r70.y, r70.z \n"
"call 4 \n"
//"call 3 \n"
//"call 6 \n"
"call 5 \n"
"endmain \n"
";twiddle 4 \n"
"func 6 \n"
"itof r100.x___, r70.z \n"
"div_zeroop(fltmax) r100, r100.x, l3.x \n"
"mul_ieee r100, r100, l4 \n"
"cos_vec r110, r100 \n"
"sin_vec r120, r100 \n"
"mov r40, r401 \n"
"mov r41.x_z_, r110.y \n"
"mov r41._y_w, r120.y \n"
"call 0 \n"
"mov r401, r40 \n"
"mov r40, r402 \n"
"mov r41.x_z_, r110.z \n"
"mov r41._y_w, r120.z \n"
"call 0 \n"
"mov r402, r40 \n"
"mov r40, r403 \n"
"mov r41.x_z_, r110.w \n"
"mov r41._y_w, r120.w \n"
"call 0 \n"
"mov r403, r40 \n"
"ret \n"
"endfunc \n"
";mov global buffer to register, interval is 512 \n"
"func 4 \n"
"mov r400, g[r70.x+0] \n"
"mov r401, g[r70.x+512] \n"
"mov r402, g[r70.x+1024] \n"
"mov r403, g[r70.x+1536] \n"
"ret \n"
"endfunc \n"
";mov register to global buffer, interval is 512 \n"
"func 5 \n"
"mov g[r70.x+0], r400 \n"
"mov g[r70.x+1024], r401 \n"
"mov g[r70.x+512], r402 \n"
"mov g[r70.x+1536], r403 \n"
"ret \n"
"endfunc \n"
";FFT2. \n"
"func 2 \n"
"mov r100, r50 \n"
"add r50, r100, r51 \n"
"sub r51, r100, r51 \n"
"ret \n"
"endfunc \n"
";FFT4. \n"
"func 3 \n"
"mov r50, r400 \n"
"mov r51, r402 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r402, r51 \n"
"mov r50, r401 \n"
"mov r51, r403 \n"
"call 2 \n"
"mov r401, r50 \n"
"mov r403, r51 \n"
"mov r40, r403 \n"
"mov r41, l1.xzxz \n"
"call 0 \n"
"mov r403, r40 \n"
"mov r50, r400 \n"
"mov r51, r401 \n"
"call 2 \n"
"mov r400, r50 \n"
"mov r401, r51 \n"
"mov r50, r402 \n"
"mov r51, r403 \n"
"call 2 \n"
"mov r402, r50 \n"
"mov r403, r51 \n"
"ret \n"
"endfunc \n"
"func 0 \n"
"mul_ieee r100, r40, r41 \n"
"mul_ieee r101, r40, r41.yxwz \n"
"sub r40.x_z_, r100.xxzz, r100.yyww \n"
"add r40._y_w, r101.xxzz, r101.yyww \n"
"ret \n"
"endfunc \n"
"end \n";

    }
}
#endif // _FFT_IL_SOURCE_H_